<a href="https://colab.research.google.com/github/RakeshRegala/ML-Workshop/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Assignment - IV

**Load and Explore Dataset**


In [None]:
import pandas as pd

df = pd.read_csv('/content/50_Startups.csv')

print(df.head())

# Check for missing values
print(df.isnull().sum())

# Basic statistics
print(df.describe())


   R&D Spend  Administration  Marketing Spend       State     Profit
0  165349.20       136897.80        471784.10    New York  192261.83
1  162597.70       151377.59        443898.53  California  191792.06
2  153441.51       101145.55        407934.54     Florida  191050.39
3  144372.41       118671.85        383199.62    New York  182901.99
4  142107.34        91391.77        366168.42     Florida  166187.94
R&D Spend          0
Administration     0
Marketing Spend    0
State              0
Profit             0
dtype: int64
           R&D Spend  Administration  Marketing Spend         Profit
count      50.000000       50.000000        50.000000      50.000000
mean    73721.615600   121344.639600    211025.097800  112012.639200
std     45902.256482    28017.802755    122290.310726   40306.180338
min         0.000000    51283.140000         0.000000   14681.400000
25%     39936.370000   103730.875000    129300.132500   90138.902500
50%     73051.080000   122699.795000    212716.240000 

**1)Build Multiple Linear Regression Model**


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Encode categorical variables (assuming 'State' is categorical)
df = pd.get_dummies(df, columns=['State'], drop_first=True)


# Define features (X) and target (y)
X = df.drop('Profit', axis=1)
y = df['Profit']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train the model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions
y_pred = lr_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Linear Regression Results")
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared Score (R2):", r2)


Linear Regression Results
Mean Absolute Error (MAE): 6961.477813252376
Mean Squared Error (MSE): 82010363.04430099
Root Mean Squared Error (RMSE): 9055.957323458464
R-squared Score (R2): 0.8987266414328637


**2)L1 Regularization (Lasso Regression)**


In [None]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Train Lasso Regression model
lasso_model = Lasso(alpha=1.0)  # Adjust alpha for stronger/weaker regularization
lasso_model.fit(X_train, y_train)

# Make predictions
y_lasso_pred = lasso_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_lasso_pred)
mae = mean_absolute_error(y_test, y_lasso_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_lasso_pred)

print("Lasso Regression Results")
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared Score (R2):", r2)


Lasso Regression Results
Mean Absolute Error (MAE): 6961.5746884671735
Mean Squared Error (MSE): 82004202.15414938
Root Mean Squared Error (RMSE): 9055.617160312675
R-squared Score (R2): 0.8987342494230525


**3)L2 Regularization (Ridge Regression)**


In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Train Ridge Regression model
ridge_model = Ridge(alpha=1.0)  # Adjust alpha for stronger/weaker regularization
ridge_model.fit(X_train, y_train)

# Make predictions
y_ridge_pred = ridge_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_ridge_pred)
mae = mean_absolute_error(y_test, y_ridge_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_ridge_pred)

print("L2 Regression Results")
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared Score (R2):", r2)


L2 Regression Results
Mean Absolute Error (MAE): 6963.340034795974
Mean Squared Error (MSE): 81887773.66036233
Root Mean Squared Error (RMSE): 9049.186353499541
R-squared Score (R2): 0.8988780252113923


**Analyze and Compare Results**


In [None]:
# Compare results
results = {
    "Model": ["Linear Regression", "Lasso Regression", "Ridge Regression"],
    "Mean Absolute Error": [
        mean_absolute_error(y_test, y_pred),
        mean_absolute_error(y_test, y_lasso_pred),
        mean_absolute_error(y_test, y_ridge_pred)
    ],
    "Mean Squared Error": [
        mean_squared_error(y_test, y_pred),
        mean_squared_error(y_test, y_lasso_pred),
        mean_squared_error(y_test, y_ridge_pred)
    ],
    "Root Mean Squared Error": [
        np.sqrt(mean_squared_error(y_test, y_pred)),
        np.sqrt(mean_squared_error(y_test, y_lasso_pred)),
        np.sqrt(mean_squared_error(y_test, y_ridge_pred))
    ],
    "R-squared Score": [
        r2_score(y_test, y_pred),
        r2_score(y_test, y_lasso_pred),
        r2_score(y_test, y_ridge_pred)
    ]
}

results_df = pd.DataFrame(results)
print(results_df)


               Model  Mean Absolute Error  Mean Squared Error  \
0  Linear Regression          6961.477813        8.201036e+07   
1   Lasso Regression          6961.574688        8.200420e+07   
2   Ridge Regression          6963.340035        8.188777e+07   

   Root Mean Squared Error  R-squared Score  
0              9055.957323         0.898727  
1              9055.617160         0.898734  
2              9049.186353         0.898878  
