## Numpy

In [1]:
import numpy as np

In [2]:
jeff_salary = [2000,3000,3000]
nick_salary = [2600,2800,2800]
tom_salary = [2300, 2500, 2500]
base_salary = np.array([jeff_salary, nick_salary, tom_salary])
print(base_salary)

[[2000 3000 3000]
 [2600 2800 2800]
 [2300 2500 2500]]


In [3]:
jeff_bonus = [500,400,400]
nick_bonus = [600,300,400]
tom_bonus = [200,500,400]
bonus = np.array([jeff_bonus, nick_bonus, tom_bonus])
print(bonus)

[[500 400 400]
 [600 300 400]
 [200 500 400]]


## Performing Element-wise operations

In [4]:
salary_bonus = base_salary+bonus
print(type(salary_bonus))
print(salary_bonus)

<class 'numpy.ndarray'>
[[2500 3400 3400]
 [3200 3100 3200]
 [2500 3000 2900]]


## using Numpy Statistical Functions

In [5]:
print(salary_bonus.max())

3400


In [6]:
# For each row, return the highest value in the array
print(np.amax(salary_bonus, axis=1))

[3400 3200 3000]


In [7]:
# For each column, return the highest value in the array
print(np.amax(salary_bonus, axis=0))

[3200 3400 3400]


## Pandas

In [8]:
import pandas as pd
data = ['Jeff Russell','Jane Boorman','Tom Heints']
emps_names = pd.Series(data)
print(emps_names)

0    Jeff Russell
1    Jane Boorman
2      Tom Heints
dtype: object


In [9]:
data = ['Jeff Russell','Jane Boorman','Tom Heints']
emps_names = pd.Series(data, index=[9001,9002,9003])
print(emps_names)

9001    Jeff Russell
9002    Jane Boorman
9003      Tom Heints
dtype: object


## Accessing data in a series

In [11]:
print(emps_names[9001])

Jeff Russell


In [12]:
print(emps_names.loc[9001])

Jeff Russell


In [13]:
print(emps_names.iloc[0])

Jeff Russell


In [14]:
print(emps_names.loc[9001:9002])

9001    Jeff Russell
9002    Jane Boorman
dtype: object


In [15]:
print(emps_names.iloc[0:2])

9001    Jeff Russell
9002    Jane Boorman
dtype: object


In [16]:
print(emps_names[0:2])

9001    Jeff Russell
9002    Jane Boorman
dtype: object


## Combining series into a dataframe

In [17]:
data = ['Jeff.Russell','Jane.Boorman','Tom.Heints']
emps_emails = pd.Series(data, index=[9001,9002,9003], name='emails')
emps_names.name = 'names'
df = pd.concat([emps_names, emps_emails], axis=1)
print(df)

             names        emails
9001  Jeff Russell  Jeff.Russell
9002  Jane Boorman  Jane.Boorman
9003    Tom Heints    Tom.Heints


## creating pandas dataframes

## YFinance module

In [18]:
import yfinance as yf
tkr = yf.Ticker('TSLA')
hist = tkr.history(period="5d")
hist = hist.drop("Dividends", axis=1)
hist = hist.drop("Stock Splits", axis=1)
hist = hist.reset_index()
hist = hist.set_index('Date')

In [19]:
hist = hist.set_index('Date')

In [20]:
hist

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-01-28 00:00:00-05:00,396.910004,400.589996,386.5,398.089996,48910700
2025-01-29 00:00:00-05:00,395.209991,398.589996,384.480011,389.100006,68033600
2025-01-30 00:00:00-05:00,410.779999,412.5,384.410004,400.279999,98092900
2025-01-31 00:00:00-05:00,401.529999,419.98999,401.339996,404.600006,83283600
2025-02-03 00:00:00-05:00,386.690002,389.170013,374.390015,382.390015,71566665


In [21]:
hist.shape

(5, 5)

## Json Module

In [26]:
import json
import pandas as pd
data = [
    {"Empno":9001, "Salary":3000},
    {"Empno":9002, "Salary":2800},
    {"Empno":9003, "Salary":2500}
]
json_data = json.dumps(data)
salary = pd.read_json(json_data)
salary = salary.set_index('Empno')
print(salary)

       Salary
Empno        
9001     3000
9002     2800
9003     2500


  salary = pd.read_json(json_data)


In [28]:
import pandas as pd
data = [['9001','Jeff Russell','sales'],
        ['9002','Jane Boorman','sales'],
        ['9003','Tom Heints','sales']]
emps = pd.DataFrame(data,columns = ['Empno','Name','Job'])
column_types = {'Empno':int, 'Name':str, 'Job':str}
emps = emps.astype(column_types)
emps = emps.set_index('Empno')
print(emps)

               Name    Job
Empno                     
9001   Jeff Russell  sales
9002   Jane Boorman  sales
9003     Tom Heints  sales


## Combining Dataframes

In [98]:
emps_salary = emps.join(salary)
print(emps_salary)

               Name    Job  Salary
Empno                             
9001   Jeff Russell  sales  3000.0
9002   Jane Boorman  sales  2800.0
9003     Tom Heints  sales  2500.0
9004     John Hardy  sales     NaN


In [99]:
new_emp = pd.Series({'Name': 'John Hardy', 'Job': 'sales'}, name = 9004)
emps.loc[new_emp.name] = new_emp


In [100]:
print(emps)

               Name    Job
Empno                     
9001   Jeff Russell  sales
9002   Jane Boorman  sales
9003     Tom Heints  sales
9004     John Hardy  sales


In [103]:
emps_salary = emps.join(salary)
print(emps_salary)

               Name    Job  Salary
Empno                             
9001   Jeff Russell  sales  3000.0
9002   Jane Boorman  sales  2800.0
9003     Tom Heints  sales  2500.0
9004     John Hardy  sales     NaN


In [104]:
emps_salary = emps.join(salary, how = 'inner')
print(emps_salary)


               Name    Job  Salary
Empno                             
9001   Jeff Russell  sales    3000
9002   Jane Boorman  sales    2800
9003     Tom Heints  sales    2500


# One-to-many joins

In [105]:
import pandas as pd
data = [[2608,9001,35],
        [2617,9001,35],
        [2620,9001,139],
        [2621,9002,95],
        [2626,9002,218]]
orders = pd.DataFrame(data, columns=['Pono','Empno','Total'])
print(orders)

   Pono  Empno  Total
0  2608   9001     35
1  2617   9001     35
2  2620   9001    139
3  2621   9002     95
4  2626   9002    218


In [106]:
emp_orders = emps.merge(orders, 
                        how="inner", 
                        left_on="Empno",
                        right_on="Empno").set_index("Pono")
print(emps_orders)

      Empno          Name    Job  Total
Pono                                   
2608   9001  Jeff Russell  sales     35
2617   9001  Jeff Russell  sales     35
2620   9001  Jeff Russell  sales    139
2621   9002  Jane Boorman  sales     95
2626   9002  Jane Boorman  sales    218


In [107]:
print(orders.groupby(['Empno'])["Total"].mean())

Empno
9001     69.666667
9002    156.500000
Name: Total, dtype: float64


In [108]:
print(orders.groupby(['Empno'])["Total"].sum())

Empno
9001    209
9002    313
Name: Total, dtype: int64


# Loading the sample dataset into a panda dataframe

In [111]:
import pandas as pd
df = pd.read_csv('amazon_cells_labelled.txt', names=['review', 'sentiment'], sep='\t')

In [115]:
df.head()

Unnamed: 0,review,sentiment
0,So there is no way for me to plug it in here i...,0
1,"Good case, Excellent value.",1
2,Great for the jawbone.,1
3,Tied to charger for conversations lasting more...,0
4,The mic is great.,1


# Splitting the sample dataset into training set and a test set

In [114]:
from sklearn.model_selection import train_test_split
reviews = df['review'].values
sentiments = df['sentiment'].values
reviews_train, reviews_test, sentiment_train, sentiment_test = train_test_split(reviews, 
                                                                                sentiments, 
                                                                                test_size=0.2, 
                                                                                random_state=500)


## Transforming text into numerical feature vectors

In [116]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vectorizer.fit(reviews)
X_train = vectorizer.transform(reviews_train)
X_test = vectorizer.transform(reviews_test)

## Training and Evaluating the model

In [117]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train, sentiment_train) 

In [118]:
accuracy = classifier.score(X_test, sentiment_test)
print("Accuracy:", accuracy) 

Accuracy: 0.81


## Making predictions on new data

In [120]:
new_reviews = ['Old version of python useless', 'Very good effort', 'Clear and concise']
X_new = vectorizer.transform(new_reviews)
print(classifier.predict(X_new)) 

[0 1 1]
