In [6]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score


In [7]:
df=pd.read_csv("insurance.csv",sep=",")

In [8]:
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [9]:
print(df.isna().sum())

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64


In [10]:
df=df.drop(["children","region"],axis=1)

In [11]:
df

Unnamed: 0,age,sex,bmi,smoker,charges
0,19,female,27.900,yes,16884.92400
1,18,male,33.770,no,1725.55230
2,28,male,33.000,no,4449.46200
3,33,male,22.705,no,21984.47061
4,32,male,28.880,no,3866.85520
...,...,...,...,...,...
1333,50,male,30.970,no,10600.54830
1334,18,female,31.920,no,2205.98080
1335,18,female,36.850,no,1629.83350
1336,21,female,25.800,no,2007.94500


In [12]:
label_encoder=LabelEncoder()
df["sex"]= label_encoder.fit_transform(df["sex"])


In [13]:
df['smoker'].unique()

array(['yes', 'no'], dtype=object)

In [14]:

print(df['smoker'].value_counts(normalize=True))


smoker
no     0.795217
yes    0.204783
Name: proportion, dtype: float64


In [15]:
df["smoker"]= label_encoder.fit_transform(df["smoker"])

In [16]:
df

Unnamed: 0,age,sex,bmi,smoker,charges
0,19,0,27.900,1,16884.92400
1,18,1,33.770,0,1725.55230
2,28,1,33.000,0,4449.46200
3,33,1,22.705,0,21984.47061
4,32,1,28.880,0,3866.85520
...,...,...,...,...,...
1333,50,1,30.970,0,10600.54830
1334,18,0,31.920,0,2205.98080
1335,18,0,36.850,0,1629.83350
1336,21,0,25.800,0,2007.94500


In [25]:


X = df.drop(["charges"],axis=1) 
y = df["charges"].values 


# Splitting data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11 )

# Initializing the Linear regression model
model = LinearRegression()


# Training the model
model.fit(X_train, y_train)

# Predicting  test set
y_pred = model.predict(X_test)

# Evaluating accuracy
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")


# Making predictions for new cases
new_cases = np.array([
    [23,0,78.07,1],  
    [23,0,28.06,0]   
])
predictions = model.predict(new_cases)
print(f"Predicted : {predictions}")



Mean Squared Error: 27848938.96
R² Score: 0.80
Predicted : [43485.66944552  3457.91922976]




In [21]:
results = []

for state in range(1, 201):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=state)
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    
    results.append((state, mse, r2))


sorted_results = sorted(results, key=lambda x: (x[1], -x[2]))

for state, mse, r2 in sorted_results:
    print(f"Random State: {state} → MSE: {mse:.2f}  R²: {r2:.2f}")


Random State: 52 → MSE: 10794283.93  R²: 0.75
Random State: 144 → MSE: 11402417.07  R²: 0.78
Random State: 185 → MSE: 12201354.74  R²: 0.73
Random State: 178 → MSE: 12685921.84  R²: 0.73
Random State: 117 → MSE: 13331160.17  R²: 0.69
Random State: 191 → MSE: 13398468.40  R²: 0.67
Random State: 108 → MSE: 13436508.58  R²: 0.72
Random State: 66 → MSE: 14023889.94  R²: 0.70
Random State: 4 → MSE: 14243903.75  R²: 0.68
Random State: 61 → MSE: 14450515.89  R²: 0.70
Random State: 55 → MSE: 15286028.98  R²: 0.67
Random State: 110 → MSE: 15326136.32  R²: 0.64
Random State: 127 → MSE: 15616037.27  R²: 0.64
Random State: 150 → MSE: 15789802.25  R²: 0.68
Random State: 152 → MSE: 15847092.39  R²: 0.66
Random State: 193 → MSE: 15920989.53  R²: 0.65
Random State: 101 → MSE: 16058627.88  R²: 0.64
Random State: 175 → MSE: 16064668.11  R²: 0.66
Random State: 138 → MSE: 16086142.17  R²: 0.65
Random State: 130 → MSE: 16118921.06  R²: 0.59
Random State: 186 → MSE: 16164871.32  R²: 0.68
Random State: 71 → 