<a href="https://colab.research.google.com/github/PhaniChandraSekhar/AIML_Practice/blob/main/Linear_Regression_Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Create a sample DataFrame (replace with loading your Kaggle data)
data = {'size_sqft': np.random.randint(500, 3000, 100)}
df = pd.DataFrame(data)

# Now calculate the price_usd values based on the size_sqft column
df['price_usd'] = 50 * df['size_sqft'] + np.random.randn(len(df)) * 10000 + 50000


# Prepare data
X = df[['size_sqft']]
y = df['price_usd']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Implement and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Model evaluation and prediction
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Linear Regression Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R2): {r2:.2f}")

# Example prediction
new_house_size = 1800
predicted_price = model.predict([[new_house_size]])
print(f"\nPredicted price for a {new_house_size} sqft house: ${predicted_price[0]:.2f}")

Linear Regression Model Evaluation:
Mean Squared Error (MSE): 151293781.06
R-squared (R2): 0.88

Predicted price for a 1800 sqft house: $138898.62




In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np

# Create a sample DataFrame (replace with loading your Kaggle data)
np.random.seed(42)
data = {'Age': np.random.randint(18, 65, 200),
        'EstimatedSalary': np.random.randint(20000, 150000, 200)} # Removed 'ClickedAd' from initial data
df = pd.DataFrame(data)
print(df.head())
# Calculate 'ClickedAd' based on the other columns *after* the DataFrame is created
df['ClickedAd'] = (df['Age'] * 0.5 + df['EstimatedSalary'] * 0.0001 + np.random.randn(len(df)) * 5 - 30) > 0

# Convert the boolean results to integers (True becomes 1, False becomes 0)
df['ClickedAd'] = df['ClickedAd'].astype(int)

# Prepare data
X = df[['Age', 'EstimatedSalary']]
y = df['ClickedAd']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Implement and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Model evaluation and prediction
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Logistic Regression Model Evaluation:")
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", report)

# Example prediction
new_customer_data = [[35, 70000]]
predicted_click = model.predict(new_customer_data)
print(f"\nPredicted click for a 35-year-old with $70k salary: {predicted_click[0]}")

   Age  EstimatedSalary
0   56            72733
1   46            85318
2   32           129953
3   60           109474
4   25            43664
Logistic Regression Model Evaluation:
Accuracy: 0.88
Confusion Matrix:
 [[19  2]
 [ 3 16]]
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.90      0.88        21
           1       0.89      0.84      0.86        19

    accuracy                           0.88        40
   macro avg       0.88      0.87      0.87        40
weighted avg       0.88      0.88      0.87        40


Predicted click for a 35-year-old with $70k salary: 0




In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

# Create a sample dataset with non-linear relationship
np.random.seed(0)
X = np.sort(5 * np.random.rand(80, 1), axis=0)
y = np.sin(X).ravel() + np.random.normal(0, 0.1, X.shape[0])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Underfitting (Linear Model) ---
print("Underfitting Example (Linear Model):")
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear_train = linear_model.predict(X_train)
y_pred_linear_test = linear_model.predict(X_test)

mse_linear_train = mean_squared_error(y_train, y_pred_linear_train)
mse_linear_test = mean_squared_error(y_test, y_pred_linear_test)

print(f"Train MSE (Linear): {mse_linear_train:.4f}")
print(f"Test MSE (Linear): {mse_linear_test:.4f}")

# --- Overfitting (High-degree Polynomial) ---
print("\nOverfitting Example (High-degree Polynomial):")
polynomial_features = PolynomialFeatures(degree=15) # High degree
X_train_poly = polynomial_features.fit_transform(X_train)
X_test_poly = polynomial_features.transform(X_test)

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_pred_poly_train = poly_model.predict(X_train_poly)
y_pred_poly_test = poly_model.predict(X_test_poly)

mse_poly_train = mean_squared_error(y_train, y_pred_poly_train)
mse_poly_test = mean_squared_error(y_test, y_pred_poly_test)

print(f"Train MSE (Polynomial): {mse_poly_train:.4f}")
print(f"Test MSE (Polynomial): {mse_poly_test:.4f}")

# Plotting the results to visualize
plt.figure(figsize=(10, 6))
plt.scatter(X, y, s=20, label="True values")
plt.plot(X_train, y_pred_linear_train, label="Linear (Underfitting) - Train")
plt.plot(X_test, y_pred_linear_test, label="Linear (Underfitting) - Test", linestyle='--')

X_plot = np.linspace(0, 5, 100).reshape(-1, 1)
X_plot_poly = polynomial_features.transform(X_plot)
y_plot_poly = poly_model.predict(X_plot_poly)
plt.plot(X_plot, y_plot_poly, label="Polynomial (Overfitting)")

plt.xlabel("X")
plt.ylabel("y")
plt.title("Underfitting vs. Overfitting")
plt.legend()
plt.show()