# Introduction to Pandas and Numpy

In [1]:
import pandas
import numpy

In [7]:
# Importing with aliases

import pandas as pd
import numpy as np

## Getting Started with Pandas

In [8]:
# Create Pandas Series
my_list = [1, 2, 3, 4, 5]
my_series = pd.Series(my_list)
print(my_series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [9]:
# Creating a DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma'],
        'Age': [25, 30, 35, 40, 45],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']}

print(data)

{'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma'], 'Age': [25, 30, 35, 40, 45], 'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']}


In [10]:
df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston
4     Emma   45      Phoenix


In [11]:
# Dictionary keys are column names
# Dictionary values are data values

my_dict = {'Couse': ['Python', 'Conda', 'LLMs'], 'Complexity': ['Beginner', 'Intermediate', 'Advanced']}
print(my_dict)
my_df = pd.DataFrame(my_dict)
print(my_df)

{'Couse': ['Python', 'Conda', 'LLMs'], 'Complexity': ['Beginner', 'Intermediate', 'Advanced']}
    Couse    Complexity
0  Python      Beginner
1   Conda  Intermediate
2    LLMs      Advanced


In [12]:
# Shape of the dataset

print(df.shape)

(5, 3)


In [13]:
print(my_df.shape)

(3, 2)


In [15]:
# Read in a dataset

sample = pd.read_csv("sample_100.csv")

In [17]:
print(sample.shape)
print(sample)

(100, 2)
                                              reviews  label
0   Absolutely wonderful - silky and sexy and comf...      1
1   Love this dress!  it's sooo pretty.  i happene...      1
2   I had such high hopes for this dress and reall...      0
3   I love, love, love this jumpsuit. it's fun, fl...      1
4   This shirt is very flattering to all due to th...      1
..                                                ...    ...
95  I was very excited to order this top in red xs...      0
96  I am in need of easy comfortable tops for ever...      0
97  At first i wasn't sure about it. the neckline ...      1
98  I find that this brand can be a little bit all...      1
99  This top is absolutely stunning. i purchased t...      1

[100 rows x 2 columns]


In [20]:
# Get the first five rows

print(sample.head(14))

                                              reviews  label
0   Absolutely wonderful - silky and sexy and comf...      1
1   Love this dress!  it's sooo pretty.  i happene...      1
2   I had such high hopes for this dress and reall...      0
3   I love, love, love this jumpsuit. it's fun, fl...      1
4   This shirt is very flattering to all due to th...      1
5   I love tracy reese dresses, but this one is no...      0
6   I aded this in my basket at hte last mintue to...      1
7   I ordered this in carbon for store pick up, an...      0
8   I love this dress. i usually get an xs but it ...      1
9   I'm 5"5' and 125 lbs. i ordered the s petite t...      1
10  Dress runs small esp where the zipper area run...      0
11  This dress is perfection! so pretty and flatte...      1
12  More and more i find myself reliant on the rev...      1
13  Bought the black xs to go under the larkspur m...      0


In [21]:
# Get the last 5 rows

print(sample.tail(8))

                                              reviews  label
92  This shirt caught my eye because of how beauti...      0
93  Purchased this top online, and when i received...      1
94  I usually wear a medium and bought a small. it...      0
95  I was very excited to order this top in red xs...      0
96  I am in need of easy comfortable tops for ever...      0
97  At first i wasn't sure about it. the neckline ...      1
98  I find that this brand can be a little bit all...      1
99  This top is absolutely stunning. i purchased t...      1


In [22]:
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston
4     Emma   45      Phoenix


In [23]:
# Accessing columns

print(df['Name'])

0      Alice
1        Bob
2    Charlie
3      David
4       Emma
Name: Name, dtype: object


In [24]:
print(df['Age'])

0    25
1    30
2    35
3    40
4    45
Name: Age, dtype: int64


In [25]:
# Slicing 

print(df[['Name', 'City']])

      Name         City
0    Alice     New York
1      Bob  Los Angeles
2  Charlie      Chicago
3    David      Houston
4     Emma      Phoenix


In [26]:
# iloc (index locate) VS loc (locate)

print(df.iloc[1])

Name            Bob
Age              30
City    Los Angeles
Name: 1, dtype: object


In [30]:
print(df.iloc[1,0])

Bob


In [32]:
print(df.loc[0, 'Name'])

Alice


In [34]:
print(df.loc[:, "Age"])

0    25
1    30
2    35
3    40
4    45
Name: Age, dtype: int64


In [35]:
print(df.loc[:, ["Name", "City"]])

      Name         City
0    Alice     New York
1      Bob  Los Angeles
2  Charlie      Chicago
3    David      Houston
4     Emma      Phoenix


In [36]:
print(df.loc[1:3, ["Name", "City"]])

      Name         City
1      Bob  Los Angeles
2  Charlie      Chicago
3    David      Houston


In [37]:
print(df.shape)

(5, 3)


In [43]:
# Adding a new column

df['Gender'] = ['male', 'female', 'female', 'male', 'male']
print(df)

      Name  Age         City  Gender
0    Alice   25     New York    male
1      Bob   30  Los Angeles  female
2  Charlie   35      Chicago  female
3    David   40      Houston    male
4     Emma   45      Phoenix    male


In [44]:
# Renaming a column

print(df)
df.rename(columns={'Gender': 'gender'}, inplace=True)
print("\nDataFrame after renaming column:")
print(df)

      Name  Age         City  Gender
0    Alice   25     New York    male
1      Bob   30  Los Angeles  female
2  Charlie   35      Chicago  female
3    David   40      Houston    male
4     Emma   45      Phoenix    male

DataFrame after renaming column:
      Name  Age         City  gender
0    Alice   25     New York    male
1      Bob   30  Los Angeles  female
2  Charlie   35      Chicago  female
3    David   40      Houston    male
4     Emma   45      Phoenix    male


In [45]:
# Dropping a column

## Inplace
print(df)
df.drop(['gender'], axis=1, inplace=True)
print("")
print(df)

      Name  Age         City  gender
0    Alice   25     New York    male
1      Bob   30  Los Angeles  female
2  Charlie   35      Chicago  female
3    David   40      Houston    male
4     Emma   45      Phoenix    male

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston
4     Emma   45      Phoenix


In [47]:
# By assignment
print(df)
df = df.drop([''], axis=1)
print("")
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston
4     Emma   45      Phoenix

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40
4     Emma   45


In [55]:
# Handling missing values
data_with_missing = {'A': [1, 2, None, 4],
                     'B': [5, 6, 7, 8]}
df_with_missing = pd.DataFrame(data_with_missing)
print("\nDataFrame with missing values:")
print(df_with_missing)


DataFrame with missing values:
     A  B
0  1.0  5
1  2.0  6
2  NaN  7
3  4.0  8


In [56]:
# For numbers: use either mean or median
# For text: use mode

In [59]:
# Filling missing values

df_with_missing.fillna(0, inplace=True)
print("\nDataFrame after filling missing values:")
print(df_with_missing)


DataFrame after filling missing values:
     A  B
0  1.0  5
1  2.0  6
2  0.0  7
3  4.0  8


In [60]:
df_with_missing['gender'] = [None, 'male', 'male', 'female']
print(df_with_missing)

     A  B  gender
0  1.0  5    None
1  2.0  6    male
2  0.0  7    male
3  4.0  8  female


In [61]:
mode_ = df_with_missing['gender'].mode()
print(df_with_missing)
df_with_missing['gender'].fillna(mode_, inplace = True)
print(df_with_missing)

     A  B  gender
0  1.0  5    None
1  2.0  6    male
2  0.0  7    male
3  4.0  8  female
     A  B  gender
0  1.0  5    male
1  2.0  6    male
2  0.0  7    male
3  4.0  8  female


In [62]:
print(mode_)

0    male
Name: gender, dtype: object


In [66]:
df['gender'] = ['male', 'male', 'female', 'female', 'male']

In [68]:
print(df)

      Name  Age  gender
0    Alice   25    male
1      Bob   30    male
2  Charlie   35  female
3    David   40  female
4     Emma   45    male


In [71]:
print("\nAggregating data:")
print(df.groupby('gender')['Age'].median()) 


Aggregating data:
gender
female    37.5
male      30.0
Name: Age, dtype: float64


In [73]:
# Concatenating two datasets
data1 = {'A': [1, 2, 3],
         'B': [4, 5, 6]}

data2 = {'A': [7, 8, 9],
            'B': [10, 11, 12]}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

print(f"df1: \n{df1} \ndf2: \n{df2}")

df1: 
   A  B
0  1  4
1  2  5
2  3  6 
df2: 
   A   B
0  7  10
1  8  11
2  9  12


In [74]:
df_concatenated = pd.concat([df1, df2])
print("\nConcatenated DataFrame:")
print(df_concatenated)


Concatenated DataFrame:
   A   B
0  1   4
1  2   5
2  3   6
0  7  10
1  8  11
2  9  12


In [75]:
print(df)

      Name  Age  gender
0    Alice   25    male
1      Bob   30    male
2  Charlie   35  female
3    David   40  female
4     Emma   45    male


In [87]:
# Saving a csv file

df_with_missing.to_csv("class_data1.csv", index=False)

In [88]:
_ = pd.read_csv("class_data1.csv")

In [89]:
print(_)

     A  B  gender
0  1.0  5    male
1  2.0  6    male
2  0.0  7    male
3  4.0  8  female


## Getting started with numPy

In [90]:
# Creating a 1D array

arr_1d = np.array([1, 2, 3, 4, 5])
print("1D Array:")
print(arr_1d)

1D Array:
[1 2 3 4 5]


In [91]:
print(type(arr_1d))

<class 'numpy.ndarray'>


In [92]:
# Creating a 2D array (matrix)

arr_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("\n2D Array:")
print(arr_2d)


2D Array:
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [93]:
# Shape of the array
print("\nShape of the array:")
print(arr_2d.shape)

# Number of dimensions
print("\nNumber of dimensions:")
print(arr_2d.ndim)

# Data type of the elements
print("\nData type of the elements:")
print(arr_2d.dtype)


Shape of the array:
(3, 3)

Number of dimensions:
2

Data type of the elements:
int64


In [94]:
arr = np.array([[0.1, 0.2], [2.1, 2.2]])
print(arr.dtype)

float64


In [95]:
print(arr_2d)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [96]:
# Accessing elements
print("\nAccessing elements:")
print(arr_2d[0, 0])  
print(arr_2d[1, 1])


Accessing elements:
1
5


In [99]:
# Accessing multiple elements
print("\nAccessing elements:")
print(arr_2d[0:2, 0:2])  
print("")
print(arr_2d[1, :])


Accessing elements:
[[1 2]
 [4 5]]

[4 5 6]


In [104]:
# Create a sequence of of values with linspace

# np.linspace(start, stop, how many values?)
arr = np.linspace(1, 100, 10)
print("\nSequence of values:")
print(arr)


Sequence of values:
[  1.  12.  23.  34.  45.  56.  67.  78.  89. 100.]


In [107]:
# Flatten array 

arr = np.array([[1, 2, 3], [4, 5, 6]]) 
print(arr)
print(arr.shape)

print("")
flat_arr = arr.flatten() 
print(flat_arr)
print(flat_arr.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)

[1 2 3 4 5 6]
(6,)


In [108]:
a = np.array([1, 2, 5, 3]) 

In [111]:
my_list = [1, 2, 5, 3]

for index in range(len(my_list)):
    val = my_list[index] + 1
    my_list[index] = val
    
print(my_list)

[2, 3, 6, 4]


In [112]:
print(a+1)

[2 3 6 4]


In [113]:
print(a-3)

[-2 -1  2  0]


In [114]:
# Raise to power

print(a**3)

[  1   8 125  27]


In [115]:
# Exponential

print(np.exp(a))

[  2.71828183   7.3890561  148.4131591   20.08553692]


In [116]:
# Square root
print(np.sqrt(a))

[1.         1.41421356 2.23606798 1.73205081]


In [118]:
# modify existing array 

a *= 2 # Same as {{a = a * 2}}
print ("Doubled each element of original array:", a) 

Doubled each element of original array: [ 4  8 20 12]


In [121]:
# transpose of array 
a = np.array([[1, 2, 3], [3, 4, 5]]) 

print ("\nOriginal array:\n", a) 
print(a.shape)
print ("Transpose of array:\n", a.T) 
print(a.T.shape)


Original array:
 [[1 2 3]
 [3 4 5]]
(2, 3)
Transpose of array:
 [[1 3]
 [2 4]
 [3 5]]
(3, 2)


In [122]:
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
print(f"arr1:\n{arr1}\narr2:\n{arr2}")

print("Addition:")
print(arr1 + arr2)

print("Multiplication:")
print(arr1 * arr2)

arr1:
[[1 2]
 [3 4]]
arr2:
[[5 6]
 [7 8]]
Addition:
[[ 6  8]
 [10 12]]
Multiplication:
[[ 5 12]
 [21 32]]


In [130]:
# arange in numpy is the same as range

print(np.arange(0, 10))
print("")
for i in range(0, 10):
    print(i)

[0 1 2 3 4 5 6 7 8 9]

0
1
2
3
4
5
6
7
8
9


In [134]:
# Reshaping arrays

print("\nReshaping arrays:")
arr = np.arange(1, 10)
print("Original array:")
print(arr)

# Make sure the length of the array matches the reshape arguments
reshaped_arr = arr.reshape(3, 3)
print("Reshaped array:")
print(reshaped_arr)


Reshaping arrays:
Original array:
[1 2 3 4 5 6 7 8 9]
Reshaped array:
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [132]:
reshaped_arr.shape

(3, 3)

In [135]:
# Stacking arrays

print(arr1)
print(arr2)
print("")

print("\nStacking arrays:")
stacked_arr = np.vstack((arr1, arr2))
print("Vertical stack:")
print(stacked_arr)
stacked_arr = np.hstack((arr1, arr2))
print("Horizontal stack:")
print(stacked_arr)

[[1 2]
 [3 4]]
[[5 6]
 [7 8]]


Stacking arrays:
Vertical stack:
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
Horizontal stack:
[[1 2 5 6]
 [3 4 7 8]]


In [137]:
# Random number generation

print("\nRandom number generation:")
rand_arr = np.random.randint(1, 10, size=(3, 3))
print(rand_arr)


Random number generation:
[[1 1 8]
 [5 9 6]
 [3 9 4]]


In [138]:
# Saving and loading arrays

# Save
np.save('saved_array.npy', rand_arr) 

# Load
loaded_arr = np.load('saved_array.npy')  

print("\nLoaded array:")
print(loaded_arr)


Loaded array:
[[1 1 8]
 [5 9 6]
 [3 9 4]]


In [141]:
# Matrix operations and functions

matrix = np.array([[1, 2, 3], [2, 3, 4], [2, 4, 6], [3, 5, 1]])
print(matrix)

# Sum of all elements
print("\nSum of all elements:", np.sum(matrix))

# Mean of all elements
print("Mean of all elements:", np.mean(matrix))

# Median of all elements
print("Median of all elements:", np.median(matrix))

# Max and min elements
print("Maximum element:", np.max(matrix))
print("Minimum element:", np.min(matrix))

[[1 2 3]
 [2 3 4]
 [2 4 6]
 [3 5 1]]

Sum of all elements: 36
Mean of all elements: 3.0
Median of all elements: 3.0
Maximum element: 6
Minimum element: 1


In [145]:
# Finding the determinant of a matrix
# linalg - Linear Algebra
# det - Determinant

matrix = np.array([[11, 2, 31], [22, 32, 49], [29, 4, 60]])
print(matrix.shape)
print("\nDeterminant of matrix:")
determinant = np.linalg.det(matrix)
print(determinant)

(3, 3)

Determinant of matrix:
-6874.000000000006


In [146]:
# Finding the inverse of a matrix

matrix = np.array([[1, 2],
                    [3, 4]])

print("\nInverse of matrix:")
inverse_matrix = np.linalg.inv(matrix)
print(inverse_matrix)


Inverse of matrix:
[[-2.   1. ]
 [ 1.5 -0.5]]


In [147]:
# Solving linear equations

A = np.array([[2, 1], [1, 1]])
print(A)
print("")
b = np.array([4, 3])
print(b)

print("\nSolving linear equations:")
x = np.linalg.solve(A, b)
print("Solution:", x)

[[2 1]
 [1 1]]

[4 3]

Solving linear equations:
Solution: [1. 2.]


In [149]:
A = np.array([[1, 1], [1, -1]])
print(A)
print("")
b = np.array([3, -1])
print(b)

[[ 1  1]
 [ 1 -1]]

[ 3 -1]


In [150]:
answer = np.linalg.solve(A, b)
print(answer)

[1. 2.]


In [151]:
x = answer[0]
y = answer[1]

print(x, y)

1.0 2.0
