#### Numpy

In [1]:
import numpy as np


In [2]:
#creating a 1-d array
arr1=np.array([1,2,3,4,5])
print(arr1)
print(type(arr1))
print(arr1.shape)
print(arr1.ndim)

[1 2 3 4 5]
<class 'numpy.ndarray'>
(5,)
1


In [10]:
arr2 = np.array([1,2,3,4,5,6])
print(arr2.shape)
arr2 = arr2.reshape(3,2) #reshaping an array
print(arr2)
print(arr2.shape)

(6,)
[[1 2]
 [3 4]
 [5 6]]
(3, 2)


In [13]:
print(arr2.dtype)
print(arr2.size)
print(arr2.itemsize)

int32
6
4


In [12]:
#Identity matrix
print(np.identity(3))
#OR
print(np.eye(3))

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


### Vector Operations in numpy

In [17]:
arr1=np.array([1,2,3,4,5])
arr2=np.array([10,20,30,40,50])

## Element wise addition:
print("Addition: ",arr1+arr2)

## Element wise subtraction:    
print("Subtraction: ",arr1-arr2)

## Element wise multiplication:
print("Multiplication: ",arr1*arr2)

## Element wise division:
print("Division: ",arr1/arr2)

Addition:  [11 22 33 44 55]
Subtraction:  [ -9 -18 -27 -36 -45]
Multiplication:  [ 10  40  90 160 250]
Division:  [0.1 0.1 0.1 0.1 0.1]


In [20]:
#Square root
print("Sq. root: ", np.sqrt(arr1))

#exponent
print("Exponent: ", np.exp(arr1))

#log    
print("Log: ",np.log(arr1))

#sin
print("Sin: ",np.sin(arr1))

Sq. root:  [1.         1.41421356 1.73205081 2.         2.23606798]
Exponent:  [  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]
Log:  [0.         0.69314718 1.09861229 1.38629436 1.60943791]
Sin:  [ 0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427]


In [22]:
#slicing and indexing
arr = np.arange(1,13).reshape(3,4)
print(arr)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [23]:
arr[1:]

array([[ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [24]:
arr[1:,2:]

array([[ 7,  8],
       [11, 12]])

In [29]:
# Mean and Std. Deviation
data=np.arange(1,11)
mean=np.mean(data)
print("Mean: ",mean)
std_dev = np.std(data)
print("Standard Deviation: ",std_dev)

#Normalizing the data: making mean = 0 , deviation = 1
normalized_data = (data-mean)/std_dev
print("Normalized Data: ",normalized_data)

Mean:  5.5
Standard Deviation:  2.8722813232690143
Normalized Data:  [-1.5666989  -1.21854359 -0.87038828 -0.52223297 -0.17407766  0.17407766
  0.52223297  0.87038828  1.21854359  1.5666989 ]


In [31]:
print(data)
print(data>5)

[ 1  2  3  4  5  6  7  8  9 10]
[False False False False False  True  True  True  True  True]


In [32]:
print(data[data>5])

[ 6  7  8  9 10]


#### Pandas

Pandas Series: A one-dimensional labeled array capable of holding data of any type (integer, floating point, Python objects, strings, datetime objects, etc.)

Pandas DataFrame: A two-dimensional labeled data structure with columns of potentially different types. Like a spreadsheet or SQL table, but more powerful and flexible

In [33]:
import pandas as pd

In [34]:
#Creating a series from a dictionary
d = {'a':10,'b':20,'c':30}
series_dict = pd.Series(d)
print(series_dict)

a    10
b    20
c    30
dtype: int64


In [36]:
#Creating series with custom index
data = [10,20,30,40,50]
index=['a','b','c','d','e']
series_list = pd.Series(data,index=index)
print(series_list)

a    10
b    20
c    30
d    40
e    50
dtype: int64


In [38]:
# Creating dataframe from dictionary

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'London', 'Paris']
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


In [39]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [41]:
data = [
    {"Name": 'Alice', 'Age': 25, 'City': 'New York'},
    {"Name": 'Bob', 'Age': 30, 'City': 'London'},
    {"Name": 'Charlie', 'Age': 35, 'City': 'Paris'}
]

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


In [43]:
# Accessing a specific element

print(df.at[1,'Age'])
print(df.at[2,'Age'])

30
35


In [44]:
df.iat[2,1] #Takes both row & column index

35

In [45]:
df['Salary'] = [50000, 60000, 70000]
df

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,50000
1,Bob,30,London,60000
2,Charlie,35,Paris,70000


In [46]:
df.drop('Salary',axis=1)

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


In [49]:
df #will still have the salary column. This is because drop action is temporary

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,50000
1,Bob,30,London,60000
2,Charlie,35,Paris,70000


In [50]:
df.drop('Salary',axis=1,inplace=True) #inplace=true makes the operation permanent
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris
