<a href="https://colab.research.google.com/github/Lokendra-parmar/python-programming-questions/blob/main/Welcome_to_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GA_5_MLP

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Step 2: Upload the dataset
from google.colab import files
uploaded = files.upload()

Saving GA_5_dataset.csv to GA_5_dataset.csv


In [6]:
df = pd.read_csv('GA_5_dataset.csv')  # Make sure the file name is correct

# Step 4: Separate features (X) and target (y)
X = df.drop(columns=['Credit_Limit'])  # Features
y = df['Credit_Limit']                # Target

# Step 5: Split the data (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=42)

# Step 6: Check the shapes
print("Shape of X_train:", X_train.shape)
print("Shape of X_test :", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test :", y_test.shape)

Shape of X_train: (4200, 16)
Shape of X_test : (1800, 16)
Shape of y_train: (4200,)
Shape of y_test : (1800,)


In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Initialize the model
model = LinearRegression(fit_intercept=False)

# Train the model on training data
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Step 8: Calculate and print R² score
r2 = r2_score(y_test, y_pred)
print("R² Score on Test Set:", r2)

R² Score on Test Set: -0.41121711792312987


In [8]:
import numpy as np

# Step 9: Get the model coefficients
coefficients = model.coef_

# Step 10: Find index of the highest absolute coefficient
index_max_coeff = np.argmax(np.abs(coefficients))

print("Index of feature with highest absolute coefficient value:", index_max_coeff)


Index of feature with highest absolute coefficient value: 5


In [9]:
from sklearn.linear_model import Ridge

# Step 11: Train Ridge Regression model
ridge_model = Ridge(solver='sag', tol=0.0005, random_state=42)

# Fit the model on training data
ridge_model.fit(X_train, y_train)

# Predict on test data
y_pred_ridge = ridge_model.predict(X_test)

# Step 12: Compute R² score
r2_ridge = r2_score(y_test, y_pred_ridge)
print("R² Score of Ridge Regression on Test Set:", r2_ridge)

R² Score of Ridge Regression on Test Set: 0.5031632306039973


In [10]:
print("Intercept of Ridge Regression model:", ridge_model.intercept_)


Intercept of Ridge Regression model: 8638.307615757858


In [11]:
from sklearn.linear_model import Lasso

# Step: Train Lasso Regression model
lasso_model = Lasso(alpha=100, random_state=42)

# Fit the model on training data
lasso_model.fit(X_train, y_train)

# Predict on test data
y_pred_lasso = lasso_model.predict(X_test)

# Compute R² score
r2_lasso = r2_score(y_test, y_pred_lasso)
print("R² Score of Lasso Regression on Test Set:", r2_lasso)


R² Score of Lasso Regression on Test Set: 0.5013545795541585


In [12]:
import numpy as np

# Get coefficients from the trained Lasso model
lasso_coeffs = lasso_model.coef_

# Count how many are in the range [-1, 1]
count_in_range = np.sum((lasso_coeffs >= -1) & (lasso_coeffs <= 1))

print("Number of coefficients in the range [-1, 1]:", count_in_range)


Number of coefficients in the range [-1, 1]: 9


In [13]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Step: Train KNeighborsRegressor
knn_model = KNeighborsRegressor(n_neighbors=10, p=1)

# Fit the model on training data
knn_model.fit(X_train, y_train)

# Predict on test data
y_pred_knn = knn_model.predict(X_test)

# Compute RMSE
rmse_knn = np.sqrt(mean_squared_error(y_test, y_pred_knn))
print("Root Mean Squared Error (RMSE) of KNN on Test Set:", rmse_knn)


Root Mean Squared Error (RMSE) of KNN on Test Set: 6707.055787083381


In [14]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Step: Train Decision Tree Regressor
tree_model = DecisionTreeRegressor(
    max_depth=10,
    min_samples_split=6,
    min_samples_leaf=6,
    random_state=42
)

# Fit the model on training data
tree_model.fit(X_train, y_train)

# Predict on test data
y_pred_tree = tree_model.predict(X_test)

# Compute RMSE
rmse_tree = np.sqrt(mean_squared_error(y_test, y_pred_tree))
print("Root Mean Squared Error (RMSE) of Decision Tree on Test Set:", rmse_tree)


Root Mean Squared Error (RMSE) of Decision Tree on Test Set: 6740.833851583081


In [15]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# Step: Define parameter grid
param_grid = {
    'n_estimators': [10, 50, 100, 200, 500],
    'learning_rate': [0.1, 0.5, 1, 2]
}

# Step: Initialize AdaBoostRegressor
ada = AdaBoostRegressor(random_state=42)

# Step: Apply GridSearchCV
grid_search = GridSearchCV(estimator=ada,
                           param_grid=param_grid,
                           cv=4,
                           scoring='r2',
                           n_jobs=-1)

# Fit on training data
grid_search.fit(X_train, y_train)

# Get the best model
best_ada_model = grid_search.best_estimator_

# Predict on test data
y_pred_ada = best_ada_model.predict(X_test)

# Compute R² score
r2_ada = r2_score(y_test, y_pred_ada)

# Display results
print("Best Parameters:", grid_search.best_params_)
print("R² Score of Best AdaBoost Model on Test Set:", r2_ada)


Best Parameters: {'learning_rate': 0.1, 'n_estimators': 10}
R² Score of Best AdaBoost Model on Test Set: 0.5400284992718735


In [16]:
print("Best n_estimators:", grid_search.best_params_['n_estimators'])


Best n_estimators: 10
