### Supervised Learning
1. Regression Models
2. Classification Models

#### Scikit Learn
- Scikit Learn is one of the important library in python, it plays a key role in machine learning.
- Scikit learn contains Built-In machine learning algorithms, preprocessing algorithms, etc,..
- https://scikit-learn.org/stable/

### Regression Models

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.DataFrame({'HeadSize':[1,2,3,4,5],
                   'BrainWeight':[3,4,4.8,7,8.9]})
df

Unnamed: 0,HeadSize,BrainWeight
0,1,3.0
1,2,4.0
2,3,4.8
3,4,7.0
4,5,8.9


In [3]:
df.shape

(5, 2)

In [4]:
df.describe()

Unnamed: 0,HeadSize,BrainWeight
count,5.0,5.0
mean,3.0,5.54
std,1.581139,2.387048
min,1.0,3.0
25%,2.0,4.0
50%,3.0,4.8
75%,4.0,7.0
max,5.0,8.9


In [7]:
X = df['HeadSize'] # Input
Y = df['BrainWeight'] # Taget

### Linear Regression Model

#### Line Equation Formula
- Y  = m * X + C
- Y = b0 + b1 * X
    - b0 is an Y-Intercept
    - b1 is Slope

### Slope Formula (b1)

In [6]:
##          (Xi - mean(X)) * (Yi - mean(Y))
##  b1 = -------------------------------------
##              (Xi - mean(X))**2

In [11]:
n = len(X)
x_mean = np.mean(X)
y_mean = np.mean(Y)

In [12]:
numerator = 0
denominator = 0
for i in range(n):
    numerator = numerator + (X[i] - x_mean) * (Y[i] - y_mean)
    denominator = denominator + (X[i] - x_mean)**2
    
b1 = numerator / denominator

In [13]:
print(b1) ## Slope = 1.48

1.48


### Y - Intercept (b0)
- b0 = mean(Y) - (b1 * mean(X))

In [14]:
b0 = y_mean - (b1 * x_mean)

In [15]:
print(b0)

1.1000000000000014


In [16]:
### predict the values

for i in range(n):
    y_prediction = b0 + b1 * X[i]
    print(y_prediction)

2.5800000000000014
4.060000000000001
5.540000000000001
7.020000000000001
8.500000000000002


### Apply Linear Regression Model

In [17]:
df

Unnamed: 0,HeadSize,BrainWeight
0,1,3.0
1,2,4.0
2,3,4.8
3,4,7.0
4,5,8.9


In [35]:
X1 = df[['HeadSize']].values # Input
Y1 = df[['BrainWeight']].values # Target

In [41]:
X1

array([[1],
       [2],
       [3],
       [4],
       [5]], dtype=int64)

In [37]:
## Step1 = Split the data into input and target
## Step2 = Import model
## Step3 = Split dataset for training and testing
## Step4 = Assign model to any object
## Step5 = Fit the model
## Step6 = Predict the model and find accuracy

In [38]:
from sklearn.linear_model import LinearRegression

In [39]:
lin_reg = LinearRegression()

In [40]:
lin_reg.fit(X1,Y1) # By fitting the model, formula will be applied to our dataset

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [42]:
y_prediction2 = lin_reg.predict(X1)
y_prediction2

array([[2.58],
       [4.06],
       [5.54],
       [7.02],
       [8.5 ]])

In [45]:
lin_reg.predict([[2.5],[3]])

array([[4.8 ],
       [5.54]])

In [47]:
df2 = pd.read_csv('https://raw.githubusercontent.com/nagamounika5/Datasets/master/headbrain.csv')
df2.head()

Unnamed: 0,Gender,Age Range,Head Size(cm^3),Brain Weight(grams)
0,1,1,4512,1530
1,1,1,3738,1297
2,1,1,4261,1335
3,1,1,3777,1282
4,1,1,4177,1590


In [48]:
# men --> 4000 --> 1500
# women -->4000 --> 1450

In [49]:
df2.isnull().sum()

Gender                 0
Age Range              0
Head Size(cm^3)        0
Brain Weight(grams)    0
dtype: int64

In [51]:
df2.shape

(237, 4)

In [69]:
X = df2[['Gender','Age Range','Head Size(cm^3)']]
Y = df2[['Brain Weight(grams)']]

In [70]:
X.head()

Unnamed: 0,Gender,Age Range,Head Size(cm^3)
0,1,1,4512
1,1,1,3738
2,1,1,4261
3,1,1,3777
4,1,1,4177


In [71]:
Y[0:5]

Unnamed: 0,Brain Weight(grams)
0,1530
1,1297
2,1335
3,1282
4,1590


In [72]:
from sklearn.linear_model import LinearRegression

In [73]:
lin_reg2 = LinearRegression()

In [74]:
lin_reg2.fit(X,Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [75]:
y_prediction3 = lin_reg2.predict(X)
y_prediction3

array([[1519.93452386],
       [1330.91463003],
       [1458.63737483],
       [1340.43888825],
       [1438.1235879 ],
       [1293.55023241],
       [1342.39258224],
       [1287.20072693],
       [1300.38816139],
       [1390.50229682],
       [1258.87216403],
       [1393.18862606],
       [1306.98187861],
       [1445.69415212],
       [1353.87053445],
       [1364.61585141],
       [1272.05959849],
       [1264.48903426],
       [1173.88647533],
       [1498.44388994],
       [1365.10427491],
       [1406.13184876],
       [1347.03260547],
       [1324.07670105],
       [1577.32428491],
       [1498.19967819],
       [1403.68973127],
       [1400.27076679],
       [1261.55849328],
       [1437.6351644 ],
       [1342.88100574],
       [1345.07891148],
       [1420.05191847],
       [1434.21619992],
       [1433.48356467],
       [1349.47472296],
       [1279.38595096],
       [1333.35674752],
       [1232.25308338],
       [1270.83853974],
       [1385.61806184],
       [1274.013

In [76]:
y_prediction3[:5]

array([[1519.93452386],
       [1330.91463003],
       [1458.63737483],
       [1340.43888825],
       [1438.1235879 ]])

In [77]:
X.head()

Unnamed: 0,Gender,Age Range,Head Size(cm^3)
0,1,1,4512
1,1,1,3738
2,1,1,4261
3,1,1,3777
4,1,1,4177


In [78]:
Y[:5]

Unnamed: 0,Brain Weight(grams)
0,1530
1,1297
2,1335
3,1282
4,1590


In [79]:
lin_reg2.predict([[1,1,4512]])

array([[1519.93452386]])