## NUMPY

Numpy is a fundamental library for scientific computing in Python. It provides support for arrays and matrices , along with a collection of mathematical functions to operate on these data stucture. In this lesson, we will cover the basics of Numpy, focusing on arrays and vectorized operations.

In [3]:
import numpy as np

## Create arrays using NumPy
##Creating 1D array

arr_1D = np.array([1,2,3])
print(arr_1D)
print(type(arr_1D))
print(arr_1D.shape)

[1 2 3]
<class 'numpy.ndarray'>
(3,)


In [8]:
##Reshaping array
arr_1D = np.array([1,2,3,4,5,6])
reshape = arr_1D.reshape((2,3))
print(reshape)

[[1 2 3]
 [4 5 6]]


In [11]:
arr2 = np.array([[1,2,3,4,5] , [1,2,3,4,5]])
print(arr2)
print(arr2.shape)

[[1 2 3 4 5]
 [1 2 3 4 5]]
(2, 5)


In [13]:
np.arange(0,10,1).reshape((2,5))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [14]:
np.ones((5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [18]:
'''identity matrix'''
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [19]:
'''Numpy vectorized operation'''
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([10,20,30,40,50])

##element wise addition
print(arr1 + arr2)

##element wise substraction
print(arr1 - arr2)

##element wise division
print(arr1 / arr2)

[11 22 33 44 55]
[ -9 -18 -27 -36 -45]
[0.1 0.1 0.1 0.1 0.1]


In [20]:
'''Universal Function'''
##universal array
arr = np.array([1,2,3,4,5])

##Element wise log , sin , square
print(np.sqrt(arr))
print(np.sin(arr))
print(np.log(arr))

[1.         1.41421356 1.73205081 2.         2.23606798]
[ 0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427]
[0.         0.69314718 1.09861229 1.38629436 1.60943791]


In [31]:
arr = np.arange(1,13).reshape(3,4)
print(arr)

print(arr[1][1])

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
6


In [35]:
print(arr[1:, 2:])

[[ 7  8]
 [11 12]]


In [42]:
print(arr[0:2 , 2:])
print(arr[1: , 1:3])

[[3 4]
 [7 8]]
[[ 6  7]
 [10 11]]


In [44]:
'''Changing all the numericals'''
arr[1:] = 100
print(arr)

[[  1   2   3   4]
 [100 100 100 100]
 [100 100 100 100]]


In [47]:
'''Logical Operation'''
import numpy as np

arr = np.arange(1,13).reshape((3,4))
arr>

array([[False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True]])

In [48]:
mask = arr % 2 == 0
print(arr[mask])

[ 2  4  6  8 10 12]


In [3]:
import numpy as np

arr = np.arange(24).reshape((2,3,4))
print(arr)
print(f'\n{arr.shape}')

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]

(2, 3, 4)


## Pandas

In [10]:
import pandas as pd
import numpy as np

In [4]:
'''Series'''
data=[1,2,3,4,5]
series=pd.Series(data)
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [6]:
'''Series form dictionary'''
data = {'a':1,'b':2,'c':3}
series_dict = pd.Series(data)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [7]:
'''Series using data and index'''
data = [1,2,3]
index=['a','b','c']

series = pd.Series(data=data , index=index)
print(series)

a    1
b    2
c    3
dtype: int64


In [22]:
'''Dataframe'''
data={
    "Name":['A' , 'B' , 'C'],
    "Age": [22 , 23 , 45],
    "City": ["Kolkata" , "Banglore" , "Delhi"],
    "pin": [111,121,131]
}

df = pd.DataFrame(data)
print(df)
print(type(df))

arr = np.array(df).reshape(4,3)
print(arr)

  Name  Age      City  pin
0    A   22   Kolkata  111
1    B   23  Banglore  121
2    C   45     Delhi  131
<class 'pandas.core.frame.DataFrame'>
[['A' 22 'Kolkata']
 [111 'B' 23]
 ['Banglore' 121 'C']
 [45 'Delhi' 131]]


In [23]:
df

Unnamed: 0,Name,Age,City,pin
0,A,22,Kolkata,111
1,B,23,Banglore,121
2,C,45,Delhi,131


In [30]:
df['Salary'] = [100 , 200 , 300]
df

Unnamed: 0,Name,Age,City,pin,Salary
0,A,22,Kolkata,111,100
1,B,23,Banglore,121,200
2,C,45,Delhi,131,300


In [37]:
'''State not changing so the previous value persist'''
df.drop(0, axis=0)

Unnamed: 0,Name,Age,City,pin,Salary
1,B,23,Banglore,121,200
2,C,45,Delhi,131,300


In [36]:
df

Unnamed: 0,Name,Age,City,pin,Salary
0,A,22,Kolkata,111,100
1,B,23,Banglore,121,200
2,C,45,Delhi,131,300


In [38]:
'''State will change and the new value persist'''
df.drop('Salary',axis=1,inplace=True)
df

Unnamed: 0,Name,Age,City,pin
0,A,22,Kolkata,111
1,B,23,Banglore,121
2,C,45,Delhi,131


In [27]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

df = pd.read_csv('student_data.csv')
df.head(5)


Unnamed: 0,StudentID,Name,Age,Gender,Subject,Marks
0,1,Alice Johnson,20,Female,Math,85.0
1,2,Bob Smith,21,Male,English,78.0
2,3,Charlie Lee,19,Male,Science,90.0
3,4,Daisy Evans,22,Female,History,88.0
4,5,Elijah Brown,20,Male,Math,92.0


In [26]:
df.tail(10)

Unnamed: 0,StudentID,Name,Age,Gender,Subject,Marks
90,91,Mike Jenkins,20,Male,Science,
91,92,Nina Flores,19,Female,History,83.0
92,93,Omar Hayes,22,Male,Math,88.0
93,94,Paula Ramirez,21,Female,English,87.0
94,95,Quincy Turner,20,Male,Science,90.0
95,96,Rita Campbell,19,Female,History,
96,97,Sean Hill,22,Male,Math,89.0
97,98,Tara Lewis,21,Female,English,85.0
98,99,Uriel Young,20,Male,Science,87.0
99,100,Vanessa Scott,19,Female,History,81.0


In [29]:
df.describe()

Unnamed: 0,StudentID,Age,Marks
count,100.0,100.0,87.0
mean,50.5,20.48,85.977011
std,29.011492,1.11446,4.215081
min,1.0,19.0,76.0
25%,25.75,19.75,83.0
50%,50.5,20.0,87.0
75%,75.25,21.0,89.0
max,100.0,22.0,95.0


In [30]:
df.dtypes

StudentID      int64
Name          object
Age            int64
Gender        object
Subject       object
Marks        float64
dtype: object

In [36]:
## Handling missing values

print(df.isnull().any(axis=1))

0     False
1     False
2     False
3     False
4     False
5      True
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14     True
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22     True
23    False
24    False
25    False
26    False
27    False
28     True
29    False
30    False
31    False
32    False
33    False
34    False
35     True
36    False
37    False
38    False
39    False
40    False
41    False
42     True
43    False
44    False
45    False
46    False
47    False
48    False
49    False
50    False
51     True
52    False
53    False
54    False
55    False
56    False
57    False
58     True
59    False
60    False
61    False
62    False
63    False
64    False
65    False
66    False
67     True
68    False
69    False
70    False
71    False
72    False
73    False
74     True
75    False
76    False
77    False
78    False
79    False
80    False
81     True
82    False
83  

In [39]:
df.isnull().sum()

StudentID     0
Name          0
Age           0
Gender        0
Subject       0
Marks        13
dtype: int64

In [None]:
df_filled = df.fillna(0)

In [46]:
#### Filling missing value with a mean of missing value
df['Marks_fillNa'] = df['Marks'].fillna(df['Marks'].mean())

In [47]:
df

Unnamed: 0,StudentID,Name,Age,Gender,Subject,Marks,Marks_fillNa
0,1,Alice Johnson,20,Female,Math,85.0,85.0
1,2,Bob Smith,21,Male,English,78.0,78.0
2,3,Charlie Lee,19,Male,Science,90.0,90.0
3,4,Daisy Evans,22,Female,History,88.0,88.0
4,5,Elijah Brown,20,Male,Math,92.0,92.0
5,6,Fiona Davis,21,Female,English,,85.977011
6,7,George Wilson,20,Male,Science,84.0,84.0
7,8,Hannah Moore,19,Female,History,79.0,79.0
8,9,Ian Taylor,22,Male,Math,95.0,95.0
9,10,Julia Martinez,21,Female,English,87.0,87.0


In [50]:
df.dtypes

StudentID         int64
Name             object
Age               int64
Gender           object
Subject          object
Marks           float64
Marks_fillNa    float64
dtype: object

In [52]:
###Renaming column

df = df.rename(columns={'Gender':'Sex'})
df

Unnamed: 0,StudentID,Name,Age,Sex,Subject,Marks,Marks_fillNa
0,1,Alice Johnson,20,Female,Math,85.0,85.0
1,2,Bob Smith,21,Male,English,78.0,78.0
2,3,Charlie Lee,19,Male,Science,90.0,90.0
3,4,Daisy Evans,22,Female,History,88.0,88.0
4,5,Elijah Brown,20,Male,Math,92.0,92.0
5,6,Fiona Davis,21,Female,English,,85.977011
6,7,George Wilson,20,Male,Science,84.0,84.0
7,8,Hannah Moore,19,Female,History,79.0,79.0
8,9,Ian Taylor,22,Male,Math,95.0,95.0
9,10,Julia Martinez,21,Female,English,87.0,87.0


In [54]:
## Change datatypes

df['Age_new'] = df['Age'].fillna(df['Age'].mean()).astype(float)
df.head(10)

Unnamed: 0,StudentID,Name,Age,Sex,Subject,Marks,Marks_fillNa,Age_new
0,1,Alice Johnson,20,Female,Math,85.0,85.0,20.0
1,2,Bob Smith,21,Male,English,78.0,78.0,21.0
2,3,Charlie Lee,19,Male,Science,90.0,90.0,19.0
3,4,Daisy Evans,22,Female,History,88.0,88.0,22.0
4,5,Elijah Brown,20,Male,Math,92.0,92.0,20.0
5,6,Fiona Davis,21,Female,English,,85.977011,21.0
6,7,George Wilson,20,Male,Science,84.0,84.0,20.0
7,8,Hannah Moore,19,Female,History,79.0,79.0,19.0
8,9,Ian Taylor,22,Male,Math,95.0,95.0,22.0
9,10,Julia Martinez,21,Female,English,87.0,87.0,21.0


In [55]:
##to apply a function
df['Function'] = df['Age'].apply(lambda x:x*2)

In [56]:
df

Unnamed: 0,StudentID,Name,Age,Sex,Subject,Marks,Marks_fillNa,Age_new,Function
0,1,Alice Johnson,20,Female,Math,85.0,85.0,20.0,40
1,2,Bob Smith,21,Male,English,78.0,78.0,21.0,42
2,3,Charlie Lee,19,Male,Science,90.0,90.0,19.0,38
3,4,Daisy Evans,22,Female,History,88.0,88.0,22.0,44
4,5,Elijah Brown,20,Male,Math,92.0,92.0,20.0,40
5,6,Fiona Davis,21,Female,English,,85.977011,21.0,42
6,7,George Wilson,20,Male,Science,84.0,84.0,20.0,40
7,8,Hannah Moore,19,Female,History,79.0,79.0,19.0,38
8,9,Ian Taylor,22,Male,Math,95.0,95.0,22.0,44
9,10,Julia Martinez,21,Female,English,87.0,87.0,21.0,42
