## Numpy

Provides supports for arrays and matricces, along with a large collection of mathematical functions.

In [11]:
import numpy as np


## create an array with numpy
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1)

## create a 2D array
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2)

## shape of array
print(f"Shape of Array {arr1} is",arr1.shape)
print(f"Shape of Array {arr2} is",arr2.shape)



[1 2 3 4 5]
[[1 2 3]
 [4 5 6]]
Shape of Array [1 2 3 4 5] is (5,)
Shape of Array [[1 2 3]
 [4 5 6]] is (2, 3)


In [28]:
## reshape an array

## (i,j) -> i rows and j columns

arr3 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
print("Original Array:", arr3)
arr3 = arr3.reshape(3, 3)
print("Reshaped Array:", arr3)

arr4 = np.arange(0,10,2)
print("Original Array:", arr4)
print("Reshaped Array:", arr4.reshape(5,1))

arr5 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
print("Original Array:", arr5)
print("Reshaped Array:", arr5.reshape(9, 1))

## flatten an array
arr6 = np.array([[1, 2, 3], [4, 5, 6]])
print("Original Array:", arr6)
print("Flattened Array:", arr6.flatten())

## transpose an array
arr7 = np.array([[1, 2, 3], [4, 5, 6]])
print("Original Array:", arr7)
print("Transposed Array:", arr7.T)

Original Array: [1 2 3 4 5 6 7 8 9]
Reshaped Array: [[1 2 3]
 [4 5 6]
 [7 8 9]]
Original Array: [0 2 4 6 8]
Reshaped Array: [[0]
 [2]
 [4]
 [6]
 [8]]
Original Array: [1 2 3 4 5 6 7 8 9]
Reshaped Array: [[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
Original Array: [[1 2 3]
 [4 5 6]]
Flattened Array: [1 2 3 4 5 6]
Original Array: [[1 2 3]
 [4 5 6]]
Transposed Array: [[1 4]
 [2 5]
 [3 6]]


In [32]:
## array square root
print(np.sqrt(np.arange(0, 10)).reshape(2, 5))

[[0.         1.         1.41421356 1.73205081 2.        ]
 [2.23606798 2.44948974 2.64575131 2.82842712 3.        ]]


In [35]:
arr8 = np.arange(0,12).reshape(3,4)
print("Array: \n", arr8)

Array: 
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [47]:
print(" : ",arr8[0][0])
print(" : ", arr8[1:,2:])
print(" : ",arr8[1:2,2:3])
print(" : ",arr8[0:2,2:])

 :  0
 :  [[ 6  7]
 [10 11]]
 :  [[6]]
 :  [[2 3]
 [6 7]]


### Statistical Concept

#### Normalization

In [None]:
## to have a mean of zero and std of 1

data = np.arange(0, 5)

# calculate mean and std

mean = np.mean(data)
std_dev = np.std(data)

# Normalize the data
normalized_data = (data - mean) / std_dev # formula
print("Normalized Data:", normalized_data)

Normalized Data: [-1.41421356 -0.70710678  0.          0.70710678  1.41421356]


#### Logical Operations

In [54]:
data = np.arange(1,11)
data[data>5]

array([ 6,  7,  8,  9, 10])

In [59]:
print(data[ (data>5) & (data<8) ])

print(data[ (data<=3) | (data>=8) ])

[6 7]
[ 1  2  3  8  9 10]


## Pandas-DataFrame and Series

In [93]:
import pandas as pd

In [94]:
## Series from a list

data = np.arange(1,5)
series = pd.Series(data)
print("Series\n",series)

Series
 0    1
1    2
2    3
3    4
dtype: int64


In [95]:
## Series from a dictionary

data = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
series_dict = pd.Series(data)
print("Series from Dictionary\n",series_dict)

Series from Dictionary
 a    1
b    2
c    3
d    4
dtype: int64


In [96]:
data = [10, 20, 30]
index = ['a', 'b', 'c']

pd.Series(data, index=index)

a    10
b    20
c    30
dtype: int64

In [97]:
## Dataframe from a dictionary of list

data = {'Name': ['Tom', 'Jerry', 'Mickey', 'Donald'],
        'Age': [20, 21, 22, 23],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
        }

df = pd.DataFrame(data)
print("Dataframe from Dictionary of List\n",df)
print(type(df))

Dataframe from Dictionary of List
      Name  Age         City
0     Tom   20     New York
1   Jerry   21  Los Angeles
2  Mickey   22      Chicago
3  Donald   23      Houston
<class 'pandas.core.frame.DataFrame'>


In [98]:
## Create a DataFrame from a list of dictionaries

data = [{'Name': 'Tom', 'Age': 20, 'City': 'New York'}, 
            {'Name': 'Jerry', 'Age': 21, 'City': 'Los Angeles'}, 
                {'Name': 'Mickey', 'Age': 22, 'City': 'Chicago'}, 
                    {'Name': 'Donald', 'Age': 23, 'City': 'Houston'}
        ]

df = pd.DataFrame(data)
print("Dataframe from List of Dictionary\n",df)
print(type(df))

Dataframe from List of Dictionary
      Name  Age         City
0     Tom   20     New York
1   Jerry   21  Los Angeles
2  Mickey   22      Chicago
3  Donald   23      Houston
<class 'pandas.core.frame.DataFrame'>


In [99]:
df = pd.read_csv('example.csv')
df.head()

Unnamed: 0,name,age,city
0,Nilanjan,21,Kolkata
1,Vikram,22,Delhi
2,Rahul,23,Mumbai
3,Snigdha,24,Chennai
4,Tina,25,Kolkata


In [100]:
df['name']

0     Nilanjan
1       Vikram
2        Rahul
3      Snigdha
4         Tina
5        Jyoti
6        Pooja
7        Pinki
8         Esha
9          Raj
10      Trisha
11        Yash
12      Urzaan
13      Ishaan
14        Ojas
15      Pranav
16      Adarsh
17     Shubham
18     Darshan
19      Faisal
20     Gaikwad
21       Hetal
22       Janvi
23       Karan
24      Leslie
25        Zara
26        Xylo
27        Cara
28       Vicky
29      Bhavna
30        Neha
31        Mona
Name: name, dtype: object

In [101]:
type(df['name'])

pandas.core.series.Series

In [102]:
df.loc[0] # to access row baased on numbering

name    Nilanjan
age           21
city     Kolkata
Name: 0, dtype: object

In [103]:
df.iloc[0] # to access column based on numbering

name    Nilanjan
age           21
city     Kolkata
Name: 0, dtype: object

In [104]:
## Accessing a specific element in the dataframe

df.at[0, 'name']

'Nilanjan'

In [105]:
## Accessing specified element using iat

df.iat[0, 0]

'Nilanjan'

#### Data Manipulation with DataFrames

In [108]:
df.head(5)

Unnamed: 0,name,age,city
0,Nilanjan,21,Kolkata
1,Vikram,22,Delhi
2,Rahul,23,Mumbai
3,Snigdha,24,Chennai
4,Tina,25,Kolkata


In [133]:
# Add Salary column to the dataframe
df['Salary'] = np.random.randint(50000, 80000, size=32).tolist()
df.head(5)

Unnamed: 0,name,age,city,Salary
0,Nilanjan,21,Kolkata,67825
1,Vikram,22,Delhi,74928
2,Rahul,23,Mumbai,63075
3,Snigdha,24,Chennai,64146
4,Tina,25,Kolkata,79871


In [None]:
## Remove a Column from the dataframe

## By default axis=0, which means row, so we need to specify axis=1 to remove a column
df.drop('Salary', axis=1, inplace=True) # and to make it permanent we need to set inplace=True

In [None]:
## Add Age to the Column

df['age'] = df['age'] + 1