In [1]:
# Let's start by loading the diabetes dataset from sklearn, then convert it to a pandas DataFrame for exploratory data analysis (EDA).

import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
import seaborn as sns

# Load diabetes dataset
diabetes = load_diabetes()

In [2]:
diabetes

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [3]:
# Convert to DataFrame
df = pd.DataFrame(data=diabetes.data, columns=diabetes.feature_names)
df['target'] = diabetes.target  # Add target variable to the dataframe

In [4]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [6]:
df.shape

(442, 11)

In [7]:
df.isnull().sum()

age       0
sex       0
bmi       0
bp        0
s1        0
s2        0
s3        0
s4        0
s5        0
s6        0
target    0
dtype: int64

In [9]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age,442.0,-2.511817e-19,0.047619,-0.107226,-0.037299,0.005383,0.038076,0.110727
sex,442.0,1.23079e-17,0.047619,-0.044642,-0.044642,-0.044642,0.05068,0.05068
bmi,442.0,-2.245564e-16,0.047619,-0.090275,-0.034229,-0.007284,0.031248,0.170555
bp,442.0,-4.79757e-17,0.047619,-0.112399,-0.036656,-0.00567,0.035644,0.132044
s1,442.0,-1.3814990000000001e-17,0.047619,-0.126781,-0.034248,-0.004321,0.028358,0.153914
s2,442.0,3.9184340000000004e-17,0.047619,-0.115613,-0.030358,-0.003819,0.029844,0.198788
s3,442.0,-5.777179e-18,0.047619,-0.102307,-0.035117,-0.006584,0.029312,0.181179
s4,442.0,-9.04254e-18,0.047619,-0.076395,-0.039493,-0.002592,0.034309,0.185234
s5,442.0,9.293722000000001e-17,0.047619,-0.126097,-0.033246,-0.001947,0.032432,0.133597
s6,442.0,1.130318e-17,0.047619,-0.137767,-0.033179,-0.001078,0.027917,0.135612


In [14]:
X = df.drop("target", axis = 1)

In [15]:
X

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [16]:
y = df.target

In [17]:
y

0      151.0
1       75.0
2      141.0
3      206.0
4      135.0
       ...  
437    178.0
438    104.0
439    132.0
440    220.0
441     57.0
Name: target, Length: 442, dtype: float64

In [10]:
from sklearn.model_selection import train_test_split

In [18]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state= 45)

In [19]:
x_train

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
179,-0.023677,-0.044642,-0.015906,-0.012556,0.020446,0.041274,-0.043401,0.034309,0.014074,-0.009362
300,0.016281,-0.044642,0.073552,-0.041235,-0.004321,-0.013527,-0.013948,-0.001116,0.042897,0.044485
39,-0.001882,0.050680,0.014272,-0.074527,0.002559,0.006202,-0.013948,-0.002592,0.019196,0.003064
405,0.048974,0.050680,0.123131,0.083844,-0.104765,-0.100895,-0.069172,-0.002592,0.036644,-0.030072
96,0.056239,0.050680,0.009961,0.049415,-0.004321,-0.012274,-0.043401,0.034309,0.060791,0.032059
...,...,...,...,...,...,...,...,...,...,...
95,-0.070900,-0.044642,-0.057941,-0.081413,-0.045599,-0.028871,-0.043401,-0.002592,0.001148,-0.005220
32,0.034443,0.050680,0.125287,0.028758,-0.053855,-0.012900,-0.102307,0.108111,0.000272,0.027917
380,0.016281,-0.044642,0.026128,0.058608,-0.060735,-0.044215,-0.013948,-0.033958,-0.051404,-0.025930
131,-0.096328,-0.044642,-0.069797,-0.067642,-0.019456,-0.010708,0.015505,-0.039493,-0.046883,-0.079778


In [20]:
x_test

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
208,0.030811,-0.044642,0.040140,0.076958,0.017694,0.037830,-0.028674,0.034309,-0.001496,0.119043
402,0.110727,0.050680,-0.033151,-0.022885,-0.004321,0.020293,-0.061809,0.071210,0.015568,0.044485
128,-0.052738,-0.044642,-0.062252,-0.026328,-0.005697,-0.005072,0.030232,-0.039493,-0.030748,-0.071494
285,0.012648,-0.044642,-0.020218,-0.015999,0.012191,0.021233,-0.076536,0.108111,0.059879,-0.021788
317,0.019913,-0.044642,-0.034229,0.055165,0.067229,0.074155,-0.006584,0.032833,0.024730,0.069338
...,...,...,...,...,...,...,...,...,...,...
298,0.023546,0.050680,-0.037463,-0.046985,-0.091006,-0.075530,-0.032356,-0.039493,-0.030748,-0.013504
104,-0.027310,-0.044642,0.064930,-0.002228,-0.024960,-0.017284,0.022869,-0.039493,-0.061176,-0.063209
118,-0.056370,0.050680,-0.010517,0.025315,0.023198,0.040022,-0.039719,0.034309,0.020609,0.056912
296,0.067136,-0.044642,-0.061174,-0.040099,-0.026336,-0.024487,0.033914,-0.039493,-0.056153,-0.059067


In [21]:
y_train

179    151.0
300    275.0
39      90.0
405    281.0
96     150.0
       ...  
95     162.0
32     341.0
380     52.0
131    158.0
414    131.0
Name: target, Length: 331, dtype: float64

In [22]:
y_test

208    155.0
402    168.0
128    115.0
285    233.0
317    190.0
       ...  
298    129.0
104     95.0
118    179.0
296     89.0
289     68.0
Name: target, Length: 111, dtype: float64

In [23]:
from sklearn.preprocessing import StandardScaler

In [24]:
scale = StandardScaler()

In [25]:
x_train_scale= scale.fit_transform(x_train)

In [26]:
x_test_scale = scale.transform(x_test)

In [27]:
x_train_scale

array([[-0.54135807, -0.91034569, -0.35743752, ...,  0.77729475,
         0.31055913, -0.21105901],
       [ 0.30934747, -0.91034569,  1.49083673, ...,  0.00808522,
         0.91223292,  0.88711141],
       [-0.07733687,  1.0984838 ,  0.26607668, ..., -0.02396518,
         0.41749799,  0.04236493],
       ...,
       [ 0.30934747, -0.91034569,  0.51102869, ..., -0.70503611,
        -1.05624125, -0.54895761],
       [-2.0880954 , -0.91034569, -1.47085575, ..., -0.8252251 ,
        -0.96186105, -1.64712803],
       [ 1.70141106,  1.0984838 ,  0.11019813, ...,  2.02726023,
         1.15902953,  0.12683958]])

In [28]:
from sklearn.linear_model import LinearRegression

In [29]:
lr = LinearRegression()

In [31]:
lr =lr.fit(x_train_scale, y_train)

In [32]:
lr.score(x_test_scale,y_test)

0.5314985405152473

In [34]:
lr.intercept_

152.08761329305136

In [35]:
lr.coef_

array([  0.85568579, -10.11604267,  24.37491242,  16.29011203,
       -43.00395477,  24.98780221,   8.88356765,  11.87576195,
        37.28288962,   2.80981855])

In [37]:
y_predict = lr.predict(x_test_scale)

In [40]:
y_predict.shape

(111,)

In [39]:
y_test

208    155.0
402    168.0
128    115.0
285    233.0
317    190.0
       ...  
298    129.0
104     95.0
118    179.0
296     89.0
289     68.0
Name: target, Length: 111, dtype: float64

In [41]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [47]:
MSE = mean_squared_error(y_test, y_predict)

In [48]:
MAE = mean_absolute_error(y_test, y_predict)

In [50]:
RMSE = np.sqrt(MSE)

In [52]:
R2 = r2_score(y_test,y_predict)

In [64]:
R2

0.5314985405152473

In [53]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet

In [54]:
lasso = Lasso()

In [56]:
lasso= lasso.fit(x_train_scale,y_train)

In [57]:
lasso.score(x_test_scale,y_test)

0.5325050961687001

In [58]:
ridge = Ridge()

In [59]:
ridge =ridge.fit(x_train_scale,y_train)

In [60]:
ridge.score(x_test_scale,y_test)

0.5324449834289111

In [61]:
EN = ElasticNet()

In [62]:
EN = EN.fit(x_train_scale,y_train)

In [63]:
EN.score(x_test_scale,y_test)

0.4954450187458871

In [66]:
data = pd.read_csv("F:\\0. The Data Psychology\\4. New_Machine Learning\\1. Linear Regression\\Advertising.xls")

In [67]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [68]:
data.isna().sum()

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

In [70]:
X = data.iloc[:,:3]

In [72]:
y = data.Sales

In [73]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state= 40)

In [74]:
x_train.shape

(160, 3)

In [76]:
x_test.shape

(40, 3)

In [77]:
y_train.shape

(160,)

In [78]:
y_test.shape

(40,)

In [79]:
scale = StandardScaler()

In [81]:
x_train_scl =scale.fit_transform(x_train)

In [82]:
x_test_scl = scale.transform(x_test)

In [84]:
LR = LinearRegression()

In [85]:
LR.fit(x_train_scl,y_train)

In [86]:
LR.score(x_test_scl,y_test)

0.8477283412895041

In [89]:
y_pred = LR.predict(x_test_scl)

In [90]:
mean_squared_error(y_test,y_pred)

4.332995687070389

In [91]:
r2_score(y_test,y_pred)

0.8477283412895041

In [93]:
df1 = pd.read_csv("F:\\Dataset\\dress.csv")

In [94]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Dress_ID        500 non-null    int64  
 1   Style           500 non-null    object 
 2   Price           498 non-null    object 
 3   Rating          500 non-null    float64
 4   Size            500 non-null    object 
 5   Season          498 non-null    object 
 6   NeckLine        497 non-null    object 
 7   SleeveLength    498 non-null    object 
 8   waiseline       413 non-null    object 
 9   Material        372 non-null    object 
 10  FabricType      234 non-null    object 
 11  Decoration      264 non-null    object 
 12  Pattern Type    391 non-null    object 
 13  Recommendation  500 non-null    int64  
dtypes: float64(1), int64(2), object(11)
memory usage: 54.8+ KB


In [107]:
df1["Price"].unique()

array(['Low', 'High', 'Average', 'Medium', 'very-high', 'low', 'high',
       nan], dtype=object)

In [96]:
from sklearn.preprocessing import LabelEncoder

In [97]:
lb = LabelEncoder()

In [102]:
df1["Price_lb"]= lb.transform(df1["Price"])

In [108]:
df1["Price_lb"].unique()

array([2, 1, 0, 3, 6, 5, 4, 7])

In [109]:
df1

Unnamed: 0,Dress_ID,Style,Price,Rating,Size,Season,NeckLine,SleeveLength,waiseline,Material,FabricType,Decoration,Pattern Type,Recommendation,Price_lb
0,1006032852,Sexy,Low,4.6,M,Summer,o-neck,sleevless,empire,,chiffon,ruffles,animal,1,2
1,1212192089,Casual,Low,0.0,L,Summer,o-neck,Petal,natural,microfiber,,ruffles,animal,0,2
2,1190380701,vintage,High,0.0,L,Automn,o-neck,full,natural,polyster,,,print,0,1
3,966005983,Brief,Average,4.6,L,Spring,o-neck,full,natural,silk,chiffon,embroidary,print,1,0
4,876339541,cute,Low,4.5,M,Summer,o-neck,butterfly,natural,chiffonfabric,chiffon,bow,dot,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,713391965,Casual,Low,4.7,M,Spring,o-neck,full,natural,polyster,,,solid,1,2
496,722565148,Sexy,Low,4.3,free,Summer,o-neck,full,empire,cotton,,,,0,2
497,532874347,Casual,Average,4.7,M,Summer,v-neck,full,empire,cotton,,lace,solid,1,0
498,655464934,Casual,Average,4.6,L,winter,boat-neck,sleevless,empire,silk,broadcloth,applique,print,1,0


In [119]:
from sklearn.preprocessing import OneHotEncoder

In [120]:
One = OneHotEncoder()

In [123]:
One.fit(df1[["Price"]])

In [127]:
One.transform(df1[["Price"]])

<500x8 sparse matrix of type '<class 'numpy.float64'>'
	with 500 stored elements in Compressed Sparse Row format>

In [118]:
df1.Price.isnull().sum()

2

In [126]:
df1

Unnamed: 0,Dress_ID,Style,Price,Rating,Size,Season,NeckLine,SleeveLength,waiseline,Material,FabricType,Decoration,Pattern Type,Recommendation,Price_lb
0,1006032852,Sexy,Low,4.6,M,Summer,o-neck,sleevless,empire,,chiffon,ruffles,animal,1,2
1,1212192089,Casual,Low,0.0,L,Summer,o-neck,Petal,natural,microfiber,,ruffles,animal,0,2
2,1190380701,vintage,High,0.0,L,Automn,o-neck,full,natural,polyster,,,print,0,1
3,966005983,Brief,Average,4.6,L,Spring,o-neck,full,natural,silk,chiffon,embroidary,print,1,0
4,876339541,cute,Low,4.5,M,Summer,o-neck,butterfly,natural,chiffonfabric,chiffon,bow,dot,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,713391965,Casual,Low,4.7,M,Spring,o-neck,full,natural,polyster,,,solid,1,2
496,722565148,Sexy,Low,4.3,free,Summer,o-neck,full,empire,cotton,,,,0,2
497,532874347,Casual,Average,4.7,M,Summer,v-neck,full,empire,cotton,,lace,solid,1,0
498,655464934,Casual,Average,4.6,L,winter,boat-neck,sleevless,empire,silk,broadcloth,applique,print,1,0


In [128]:
encoded = One.transform(df1[["Price"]]).toarray()

In [129]:
encoded

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]])