# Methods for Pandas

### 1: Importing Data

In [1]:
import pandas as pd
df = pd.read_csv('cloths-rating.csv')
df.head()

Unnamed: 0,ProductID,UserID,Rating,Text
0,777,AV1YnR7wglJLPUi8IJmi,4,Great taffy at a great price.
1,767,AVpfpK8KLJeJML43BCuD,4,Absolutely wonderful - silky and sexy and comf...
2,1080,AVqkIdntQMlgsOJE6fuB,5,Love this dress! it's sooo pretty.
3,1077,AVpfpK8KLJeJML43BCuD,3,I had such high hopes for this dress and reall...
4,1049,AVpfpK8KLJeJML43BCuD,5,"I love, love, love this jumpsuit. it's fun, fl..."


### 2: Inspecting Data

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 634 entries, 0 to 633
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   ProductID  634 non-null    int64 
 1   UserID     634 non-null    object
 2   Rating     634 non-null    int64 
 3   Text       634 non-null    object
dtypes: int64(2), object(2)
memory usage: 19.9+ KB


In [3]:
df.describe()

Unnamed: 0,ProductID,Rating
count,634.0,634.0
mean,1681.768139,3.755521
std,2530.61481,1.603268
min,89.0,0.0
25%,697.0,2.0
50%,862.0,5.0
75%,1077.0,5.0
max,9696.0,5.0


### 3: Selection

In [4]:
df[['ProductID']].head()

Unnamed: 0,ProductID
0,777
1,767
2,1080
3,1077
4,1049


### 4: Checking Null Value in dataframe

In [5]:
pd.isna(df).head()

Unnamed: 0,ProductID,UserID,Rating,Text
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False


### 5: Sorting

In [6]:
df.sort_values('Rating', ascending=False).head()
# sorting Rating column in descending order

Unnamed: 0,ProductID,UserID,Rating,Text
317,444,B075P2RKDG,5,Mysore Sandal is much ahead of any other sanda...
362,697,B078X5298B,5,Flavour is Excellent🤤🤤.Go for it🔥🔥
390,8001,B07H3V7LNR,5,Very good product
385,333,AVphgVaX1cnluZ0-DR74,5,"I have hyperpigmentation , have been using thi..."
384,333,B07FPSB76P,5,"Simply awesome...!Spacious, looks good, No iss..."


# Methods for Numpy

### 1: Creating array

In [7]:
import numpy as np

one_d_array = np.array([3,5,3,4])
one_d_array

array([3, 5, 3, 4])

In [8]:
two_d_array = np.array([[1,2,3], [4,5,6]])
two_d_array

array([[1, 2, 3],
       [4, 5, 6]])

### 2: shape

In [9]:
two_d_array.shape

(2, 3)

### 3: sort()

In [10]:
one_d_array.sort()
one_d_array

array([3, 3, 4, 5])

In [11]:
my_arr = np.array([[[1,2,3], [6,2,1], [7,8,3]], [[2,3,4], [6,7,8], [9,4,2]]])
my_arr

array([[[1, 2, 3],
        [6, 2, 1],
        [7, 8, 3]],

       [[2, 3, 4],
        [6, 7, 8],
        [9, 4, 2]]])

### 4: Splitting array horizontally

In [12]:
np.hsplit(my_arr, 3)

[array([[[1, 2, 3]],
 
        [[2, 3, 4]]]),
 array([[[6, 2, 1]],
 
        [[6, 7, 8]]]),
 array([[[7, 8, 3]],
 
        [[9, 4, 2]]])]

### 5: reshape and arange

In [13]:
a = np.arange(6).reshape(3,2)
a

array([[0, 1],
       [2, 3],
       [4, 5]])

# reshape(-1,1)

In [14]:
a.reshape(-1,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

# Prediction Model using Simple Linear Regression

In [19]:
import pandas as pd
data = pd.read_csv('canada_per_capita_income.csv')
data.head()

Unnamed: 0,year,per capita income (US$)
0,1970,3399.299037
1,1971,3768.297935
2,1972,4251.175484
3,1973,4804.463248
4,1974,5576.514583


In [20]:
from sklearn import linear_model
reg = linear_model.LinearRegression()

In [24]:
reg.fit(data[['year']], data['per capita income (US$)'])

LinearRegression()

In [26]:
reg.predict([[2020]])          
# predicting per-capita-income of year 2020

array([41288.69409442])

In [27]:
reg.coef_

array([828.46507522])

In [28]:
reg.intercept_

-1632210.7578554575

#### Linear Regression Mathematical Equation is : y=mx+b

In [29]:
# y = mx + b
828.46507522*2020+-1632210.7578554575

41288.694088942604