In [None]:
# Title:1 Optimizing Linear Regression Model to Predict Medical Insurance Charges Objective: To build and optimize a Linear Regression model that predicts a person's medical insurance charges based on demographic and health attributes, ensuring: Maximum R² Score (better prediction accuracy) Minimum Mean Squared Error (MSE) (lower prediction error) 
# Dataset: File: insurance.csv 
# The goal is to predict medical insurance charges using a Linear Regression model based on personal and health attributes like age, gender, BMI, smoking status, number of children, and region. 
# • The BMI values are first categorized into four groups: Underweight, Normal weight, Overweight, and Obese.  Different test sizes (0.1, 0.2, 0.3, 0.4, 0.5) and random states (1 to 49) are tried to find: The model with the highest R²  score (better prediction accuracy) The model with the lowest Mean Squared Error (MSE) (lower prediction error) 
# • After finding the best models, we predict the insurance charges for a new individual with specific given features (age 34, male, BMI 26, 3 children, smoker, southwest region).

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error , r2_score , mean_absolute_error

df = pd.read_csv("insurance.csv")
df = pd.get_dummies(df , drop_first=True)

y = df["charges"]
x = df.drop('charges',axis=1)

T = [0.1, 0.2, 0.3, 0.4, 0.5]
R2_score = []
MSE = []

for i in T:
    for j in range(1,50):
        x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=i,random_state=j)
        lr = LinearRegression()
        lr.fit(x_train,y_train)
        m = lr.coef_
        c = lr.intercept_
        y_pred = lr.predict(x_test)
        mse = mean_squared_error(y_test,y_pred)
        MSE.append(mse)
        r2 = r2_score(y_test,y_pred)
        R2_score.append(r2)
        
print("MSE :- ",MSE,"\n")
print("R2 Score :- ",R2_score,"\n")
print(len(MSE))
print(len(R2_score))
min1 = min(MSE)
max1 = max(R2_score)
print("Minimum MSE :- ",min1)
print("Maximum R2_Score :- ",max1)

In [None]:
# Title:3 Problem Definition: The goal is to predict hotel booking status using the K-Nearest Neighbors (KNN) classification algorithm. The dataset is preprocessed by one-hot encoding categorical features. The data is split into training and testing sets (80%-20%). The model is optimized by trying different values of 'k' (1 to 20), selecting the one that gives the highest accuracy. Finally, the best KNN model is trained and evaluated to predict booking status effectively.
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

df = pd.read_csv("Hotel Reservations.csv")
df = df.drop("Booking_ID",axis=1)
df = pd.get_dummies(df , drop_first=True)
df
y = df[['booking_status_Not_Canceled']]
x = df.drop('booking_status_Not_Canceled',axis=1)
print(x.shape)
print(y.shape)
acc = []
for i in range(1,21):
    x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=0.2,random_state=42)
    nn=KNeighborsClassifier(n_neighbors=i)
    model=nn.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    cm=confusion_matrix(y_test,y_pred)
    tn = cm[0][0]
    fp = cm[0][1]
    tp = cm[1][1]
    fn = cm[1][0]
    ac = (tp+tn) / (tp+tn+fn+fp)
    acc.append(ac)
print("Acc :- ",acc,"\n")


In [None]:
max = max(acc)
print("max :- ",max)
ind = acc.index(max)
print(ind)