**Linear Regression Model**

In [29]:
import pandas as pd

df = pd.read_csv('/content/insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,expenses
0,19,female,27.9,0,yes,southwest,16884.92
1,18,male,33.8,1,no,southeast,1725.55
2,28,male,33.0,3,no,southeast,4449.46
3,33,male,22.7,0,no,northwest,21984.47
4,32,male,28.9,0,no,northwest,3866.86


LABEL ENCODER

In [32]:
from sklearn.preprocessing import LabelEncoder

le  = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])

In [33]:
X = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']

In [34]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X, y)

y_pred = model.predict(X)

In [35]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2*100} %")

Mean Squared Error: 36525536.8689183
R2 Score: 75.07516902763372 %


Applying TRAIN TEST MODEL

In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


df = pd.read_csv("/content/insurance.csv")


label_encoder = LabelEncoder()
df['sex'] = label_encoder.fit_transform(df['sex'])
df['smoker'] = label_encoder.fit_transform(df['smoker'])
df['region'] = label_encoder.fit_transform(df['region'])

X = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2*100} %")

Mean Squared Error: 33639075.08997808
R2 Score: 78.33214205203846 %


NON LINEAR

In [42]:
from sklearn.preprocessing import PolynomialFeatures
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler


df = pd.read_csv("/content/insurance.csv")

label_encoder = LabelEncoder()
df['sex'] = label_encoder.fit_transform(df['sex'])
df['smoker'] = label_encoder.fit_transform(df['smoker'])
df['region'] = label_encoder.fit_transform(df['region'])

Xdf = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']

scaler = StandardScaler()
X = scaler.fit_transform(Xdf)

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

model = LinearRegression()
model.fit(X_poly, y)


y_pred = model.predict(X_poly)
mse = mean_squared_error(y, y_pred)

print(f"Mean Squared Error: {mse}\n")
print(f"R2 Score: {r2_score(y, y_pred)}")

Mean Squared Error: 22464467.27266702

R2 Score: 0.8467036770287829


With train test model

In [43]:
from sklearn.preprocessing import PolynomialFeatures
from matplotlib import pyplot as plt


label_encoder = LabelEncoder()
df['sex'] = label_encoder.fit_transform(df['sex'])
df['smoker'] = label_encoder.fit_transform(df['smoker'])
df['region'] = label_encoder.fit_transform(df['region'])

Xdf = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']

scaler = StandardScaler()
X = scaler.fit_transform(Xdf)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_train)

model = LinearRegression()
model.fit(X_poly, y_train)


y_pred = model.predict(poly.transform(X_test))
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}\n")
print(f"R2 Score: {r2_score(y_test, y_pred)}")

Mean Squared Error: 19969083.52201677

R2 Score: 0.8638078252045538


KNN Implementing

In [44]:
import pandas as pd

df = pd.read_csv('/content/insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,expenses
0,19,female,27.9,0,yes,southwest,16884.92
1,18,male,33.8,1,no,southeast,1725.55
2,28,male,33.0,3,no,southeast,4449.46
3,33,male,22.7,0,no,northwest,21984.47
4,32,male,28.9,0,no,northwest,3866.86


In [45]:
from sklearn.preprocessing import LabelEncoder

le  = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])

In [46]:
X = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']

In [49]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

k_values = [i for i in range(1, 50) if i % 2 != 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=42)

for k in k_values:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print(f"For K value: {k} :")
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")
    print(f"R2 Score: {r2*100} % \n")

For K value: 1 :
Mean Squared Error: 191649563.6301438
R2 Score: -30.70790575145761 % 

For K value: 3 :
Mean Squared Error: 131917074.44206479
R2 Score: 10.030556779888233 % 

For K value: 5 :
Mean Squared Error: 127959940.08767258
R2 Score: 12.729382357377828 % 

For K value: 7 :
Mean Squared Error: 126414510.33128445
R2 Score: 13.7833888634052 % 

For K value: 9 :
Mean Squared Error: 126071923.6452166
R2 Score: 14.017038173250096 % 

For K value: 11 :
Mean Squared Error: 124823077.99520057
R2 Score: 14.868769825691686 % 

For K value: 13 :
Mean Squared Error: 123199694.2663962
R2 Score: 15.975942121854636 % 

For K value: 15 :
Mean Squared Error: 123367215.55731812
R2 Score: 15.861690063616496 % 

For K value: 17 :
Mean Squared Error: 124771273.55663952
R2 Score: 14.904101237591028 % 

For K value: 19 :
Mean Squared Error: 125586661.45662905
R2 Score: 14.34799433724666 % 

For K value: 21 :
Mean Squared Error: 125630870.37226632
R2 Score: 14.317843187048151 % 

For K value: 23 :
Mea

KNN mplementing in Scaled data form

In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler



df = pd.read_csv("/content/insurance.csv")


le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])

Xdf = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']

scaler = StandardScaler()
X = scaler.fit_transform(Xdf)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

k_values = [i for i in range(1, 50) if i % 2 != 0]
best_mse = float('inf')
best_r2=0
best_k=0


for k in k_values:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    print(f"For K value: {k} :")

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Mean Squared Error: {mse}")
    print(f"R2 Score: {r2*100} % \n")

    if mse < best_mse:
      best_mse = mse
      best_r2 = r2
      best_k=k

print("\nFinal Results:")
print(f"For K = {best_k}, KNN Regression achieved an MSE of {best_mse:.2f}, which is {(36525536.8689183 - best_mse) / 36525536.8689183 * 100:.2f}% lower than the MSE by Linear Regression.")
print(f"It also achieved an R2 score of {best_r2:.2f}, which is {(best_r2 - 0.7507516902763371) / (1 - 0.7507516902763371) *100:.2f}% higher than the initial R2 score.")

For K value: 1 :
Mean Squared Error: 41017162.95388656
R2 Score: 72.02567348436408 % 

For K value: 3 :
Mean Squared Error: 28815733.48058115
R2 Score: 80.34723322820803 % 

For K value: 5 :
Mean Squared Error: 25115846.431848507
R2 Score: 82.87061224612258 % 

For K value: 7 :
Mean Squared Error: 25403785.589184046
R2 Score: 82.67423337874462 % 

For K value: 9 :
Mean Squared Error: 25003569.566174276
R2 Score: 82.94718676942772 % 

For K value: 11 :
Mean Squared Error: 25436574.255218975
R2 Score: 82.65187101178364 % 

For K value: 13 :
Mean Squared Error: 25295807.078777503
R2 Score: 82.74787635864014 % 

For K value: 15 :
Mean Squared Error: 25778605.902184855
R2 Score: 82.41860024701472 % 

For K value: 17 :
Mean Squared Error: 26100796.060380265
R2 Score: 82.19886167817182 % 

For K value: 19 :
Mean Squared Error: 26422274.919051893
R2 Score: 81.97960822637984 % 

For K value: 21 :
Mean Squared Error: 25966969.334111884
R2 Score: 82.29013353286736 % 

For K value: 23 :
Mean Squar

Without train test model

In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler


df = pd.read_csv("/content/insurance.csv")


le = LabelEncoder()
df['sex'] = label_encoder.fit_transform(df['sex'])
df['smoker'] = label_encoder.fit_transform(df['smoker'])
df['region'] = label_encoder.fit_transform(df['region'])

Xdf = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = df['expenses']

scaler = StandardScaler()
X = scaler.fit_transform(Xdf)

k_values = [i for i in range(1, 50) if i % 2 != 0]
best_mse = float('inf')
best_r2=0
best_k=0
for k in k_values:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X, y)

    y_pred = knn.predict(X)

    print(f"For K value: {k} :")

    mse = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    print(f"Mean Squared Error: {mse}")
    print(f"R2 Score: {r2*100} % \n")
    if mse < best_mse:
      best_mse = mse
      best_r2 = r2
      best_k=k

print("\nFinal Results:")
print(f"For K = {best_k}, KNN Regression achieved an MSE of {best_mse:.2f}, which is {(36525536.8689183 - best_mse) / 36525536.8689183 * 100:.2f}% lower than the MSE by Linear Regression.")
print(f"It also achieved an R2 score of {best_r2:.2f}, which is {(best_r2 - 0.7507516902763371) / (1 - 0.7507516902763371) *100:.2f}% higher than the initial R2 score.")


For K value: 1 :
Mean Squared Error: 390637.46621285495
R2 Score: 99.73343108270328 % 

For K value: 3 :
Mean Squared Error: 14127732.214465747
R2 Score: 90.35931111068591 % 

For K value: 5 :
Mean Squared Error: 17617049.687186465
R2 Score: 87.97821953279602 % 

For K value: 7 :
Mean Squared Error: 19597090.041675683
R2 Score: 86.62705058677281 % 

For K value: 9 :
Mean Squared Error: 20789844.652259592
R2 Score: 85.81312122094293 % 

For K value: 11 :
Mean Squared Error: 21546780.843279675
R2 Score: 85.29659201328886 % 

For K value: 13 :
Mean Squared Error: 22363555.956899345
R2 Score: 84.73922904495068 % 

For K value: 15 :
Mean Squared Error: 23090032.26668485
R2 Score: 84.24348549731127 % 

For K value: 17 :
Mean Squared Error: 23557670.722093374
R2 Score: 83.92437151689077 % 

For K value: 19 :
Mean Squared Error: 24045957.434196535
R2 Score: 83.5911672765561 % 

For K value: 21 :
Mean Squared Error: 24666918.909555018
R2 Score: 83.16742648749648 % 

For K value: 23 :
Mean Squar