# Introduction to Pandas and Numpy

## Part 1: Introduction to Numpy

In [1]:
#Installation and Setup
!pip install numpy
import numpy as np



In [2]:
#Create an Array From a List
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1)

#Create an Array of Zeros
arr2 = np.zeros(5)
print(arr2)

#Create an Array of Ones
arr3 = np.ones((3, 3))
print(arr3)

#Create an Array of Evenly Spaced Values
arr4 = np.arange(0, 10, 2)
print(arr4)

#Create an Array of Random Values
arr5 = np.random.rand(3, 3)
print(arr5)

[1 2 3 4 5]
[0. 0. 0. 0. 0.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[0 2 4 6 8]
[[0.59517958 0.09573772 0.90143071]
 [0.6609939  0.44803587 0.25252079]
 [0.01877787 0.98489837 0.54294981]]


In [3]:
#Array Attributes: Shape, Size, Dtype

#Shape of the Array
print(arr1.shape)

#Size of the Array
print(arr1.size)

#Data Type of the Array
print(arr1.dtype)

(5,)
5
int64


In [4]:
#Indexing and Slicing Arrays

#Accessing Elements
print(arr1[0])

#Slicing
print(arr1[1:4])

1
[2 3 4]


In [5]:
#Array Operations: Arithmetic, Aggregation, Broadcasting

#Arithmetic Operations
arr6 = arr1 + arr2
print(arr6)

#Aggregation Functions
print(np.sum(arr1))

#Broadcasting
arr7 = arr1 * 2
print(arr7)

[1. 2. 3. 4. 5.]
15
[ 2  4  6  8 10]


In [6]:
#Reshaping Arrays
arr8 = np.arange(9).reshape(3, 3)
print(arr8)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [7]:
#Stacking and Splitting Arrays

#Stacking Arrays (Vertically)
arr9 = np.vstack((arr8, arr8))
print(arr9)

#Splitting Arrays
arr10, arr11 = np.split(arr9, 2)
print(arr10, arr11)

[[0 1 2]
 [3 4 5]
 [6 7 8]
 [0 1 2]
 [3 4 5]
 [6 7 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]] [[0 1 2]
 [3 4 5]
 [6 7 8]]


In [8]:
#Transposing Arrays
arr12 = arr8.T
print(arr12)

[[0 3 6]
 [1 4 7]
 [2 5 8]]


In [9]:
#Universal Functions (ufuncs)/Advanced Numpy Functions

#Sin
arr13 = np.sin(arr1)
print(arr13)

#Fancy Indexing
indices = np.array([0, 2, 4])
print(arr1[indices])

#Boolean Indexing
bool_arr = arr1 > 3
print(arr1[bool_arr])

#Vectorized Operations
arr14 = arr1 + 10
print(arr14)

#Broadcasting Pt.2
arr15 = arr1 + np.array([[10], [20], [30], [40], [50]])
print(arr15)

[ 0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427]
[1 3 5]
[4 5]
[11 12 13 14 15]
[[11 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]]


In [10]:
#Implementing Matrix Operations: Matrix Multiplication

#Create two matrices
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

#Multiply Matrices
C = np.dot(A, B)
print(C)

#Matrix Inversion
A_inv = np.linalg.inv(A)
print(A_inv)

[[19 22]
 [43 50]]
[[-2.   1. ]
 [ 1.5 -0.5]]


## Part 2: Introduction to Pandas

In [11]:
#Installation and Setup
!pip install pandas
import pandas as pd



In [12]:
#Create a Series From a List
s1 = pd.Series([1, 2, 3, 4, 5])
print(s1)

#Create a Series From a Array
s2 = pd.Series(np.array([1, 2, 3, 4, 5]))
print(s2)

#Create a Series From a Dictionary
s3 = pd.Series({'a': 1, 'b': 2, 'c': 3})
print(s3)

0    1
1    2
2    3
3    4
4    5
dtype: int64
0    1
1    2
2    3
3    4
4    5
dtype: int64
a    1
b    2
c    3
dtype: int64


In [13]:
#Indexing and Slicing Series

#Accessing Elememts by Label
print(s3['a'])

#Accessing Elememts by Position
print(s3[0])

#Slicing
print(s3[:2])

1
1
a    1
b    2
dtype: int64


  print(s3[0])


In [14]:
#Operations on Series

#Arithmetic Operations
s4 = s1 + s2
print(s4)

#Element-wise Operations
s5 = s1 * 2
print(s5)

#Aggregation Function
print(s1.sum())

0     2
1     4
2     6
3     8
4    10
dtype: int64
0     2
1     4
2     6
3     8
4    10
dtype: int64
15


In [15]:
#Handling Missing Data

#Drop Missing Values
s6 = s1.dropna()

#Fill Missing Values
s7 = s1.fillna(0)

#Check for Missing Values
print(s1.isnull())

0    False
1    False
2    False
3    False
4    False
dtype: bool


In [16]:
#Creating DataFrames

#Create DataFrame From a Dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df1 = pd.DataFrame(data)
print(df1)

#Create DataFrame From a List
data = [['Alice', 25], ['Bob', 30], ['Charlie', 35]]
df2 = pd.DataFrame(data, columns = ['Name', 'Age'])
print(df2)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [17]:
#Indexing and Slicing DataFrames

#Label-based Indexing
print(df1.loc[0, 'Name'])

#Position-based Indexing
print(df1.iloc[0, 0])

#Slicing
print(df1[:2])

Alice
Alice
    Name  Age
0  Alice   25
1    Bob   30


In [19]:
#Basic Operations

#Sorting
df1_sorted = df1.sort_values(by = 'Age')
print(df1_sorted)

#Filtering
df1_filtered = df1[df1['Age'] > 30]
print(df1_filtered)

#Selecting Columns
names = df1['Name']
print(names)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
2  Charlie   35
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object


In [22]:
#Data Manipulation

#Adding a Column
df1['Gender'] = ['Female', 'Male', 'Male']
print(df1)

#Deleting a Column
del df1['Gender']
print(df1)

#Updating a Column
df1['Age'] = df1['Age'] + 1
print(df1)

      Name  Age  Gender
0    Alice   25  Female
1      Bob   30    Male
2  Charlie   35    Male
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
0    Alice   26
1      Bob   31
2  Charlie   36


In [23]:
#Handling Missing Data

#Drop Missing Values
df1_cleaned = df1.dropna()

#Fill Missing Values
df1_filled = df1.fillna(0)

#Check for Missing Values
print(df1.isnull().any())

Name    False
Age     False
dtype: bool


In [28]:
#Concatenating DataFrames

#Create Simple DataFrames
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']})
df2 = pd.DataFrame({'A': ['A3', 'A4', 'A5'], 'B': ['B3', 'B4', 'B5']})

#Concatenate Along Rows
result_row = pd.concat([df1, df2])
print(result_row)

#Concatenate Along Columns
result_col = pd.concat([df1, df2], axis = 1)
print(result_col)

    A   B
0  A0  B0
1  A1  B1
2  A2  B2
0  A3  B3
1  A4  B4
2  A5  B5
    A   B   A   B
0  A0  B0  A3  B3
1  A1  B1  A4  B4
2  A2  B2  A5  B5


In [35]:
#Merging DataFrames

#Create Simple DataFrames
left = pd.DataFrame({'Key' : ['K0', 'K1', 'K2'], 'Value' : ['V0', 'V1', 'V2']})
right = pd.DataFrame({'Key' : ['K3', 'K4', 'K5'], 'Value' : ['V3', 'V4', 'V5']})

#Inner Join
inner_join = pd.merge(left, right, on = 'Key', how = 'inner')
print(inner_join)

#Left Join
left_join = pd.merge(left, right, on = 'Key', how = 'left')
print(left_join)

#Right Join
right_join = pd.merge(left, right, on = 'Key', how = 'right')
print(right_join)

#Outer Join
outer_join = pd.merge(left, right, on = 'Key', how = 'outer')
print(outer_join)

Empty DataFrame
Columns: [Key, Value_x, Value_y]
Index: []
  Key Value_x Value_y
0  K0      V0     NaN
1  K1      V1     NaN
2  K2      V2     NaN
  Key Value_x Value_y
0  K3     NaN      V3
1  K4     NaN      V4
2  K5     NaN      V5
  Key Value_x Value_y
0  K0      V0     NaN
1  K1      V1     NaN
2  K2      V2     NaN
3  K3     NaN      V3
4  K4     NaN      V4
5  K5     NaN      V5


In [42]:
#Joining DataFrames

##Create Simple DataFrames
left = pd.DataFrame({'Value1' : [1, 2, 3]}, index = ['A', 'B', 'C'])
right = pd.DataFrame({'Value2' : [4, 5, 6]}, index = ['D', 'E', 'F'])

#Join Based on Index
join_df = left.join(right, how = 'inner')
print(join_df)

Empty DataFrame
Columns: [Value1, Value2]
Index: []
