<a href="https://colab.research.google.com/github/AMJAMAITHILI/ML_LAB-141/blob/main/INTERNAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Step 2: Load dataset
california = fetch_california_housing()
X = california.data[:, 0].reshape(-1, 1)  # Use MedInc (median income) as single feature
y = california.target

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Standardize features (optional but recommended)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Linear Regression model
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)

# Step 6: Predictions
y_pred = lr.predict(X_test_scaled)

# Step 7: Display intercept and slope
print("Intercept (b0):", lr.intercept_)
print("Slope (b1):", lr.coef_[0])
print(f"\nInterpretation: For each 1 standard deviation increase in median income, the house price changes by {lr.coef_[0]:.2f} units.\n")

# Step 8: Compute metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Metrics on Test Data:")
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)


Intercept (b0): 2.0692396089424143
Slope (b1): 0.7965923043030702

Interpretation: For each 1 standard deviation increase in median income, the house price changes by 0.80 units.

Metrics on Test Data:
Mean Squared Error (MSE): 0.6917979868048499
Mean Absolute Error (MAE): 0.6231559293823171
Root Mean Squared Error (RMSE): 0.8317439430526982


In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Load data
data = pd.read_csv("/content/Salary.csv")
X = data[['YearsExperience']]
y = data['Salary']

# Train model
model = LinearRegression()
model.fit(X, y)

# Sample years to check
sample_years = [1, 3, 5, 7, 10]

print("Predicted Salaries for Sample Years of Experience:")
for year in sample_years:
    predicted_salary = model.predict([[year]])[0]
    print(f"Years of Experience: {year}, Predicted Salary: {predicted_salary:.2f}")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigree','Age','Outcome']
data = pd.read_csv(url, names=columns)
X = data.drop('Outcome', axis=1)
y = data['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)#1000 to make sure safe convergence
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)#shows TP, TN, FP, FN:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Confusion Matrix:\n", cm)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

TN, FP, FN, TP = cm.ravel()
manual_accuracy = (TP + TN) / (TP + TN + FP + FN)
manual_precision = TP / (TP + FP) if (TP + FP) != 0 else 0
manual_recall = TP / (TP + FN) if (TP + FN) != 0 else 0
manual_f1 = 2 * manual_precision * manual_recall / (manual_precision + manual_recall) if (manual_precision + manual_recall) != 0 else 0

print("\nManual Calculation:")
print("Accuracy:", manual_accuracy)
print("Precision:", manual_precision)
print("Recall:", manual_recall)
print("F1 Score:", manual_f1)



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

diabetes = load_diabetes()
X = diabetes.data[:, 2].reshape(-1, 1)   #taking just BMI feature[2nd col] for visualization
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Polynomial transformation->degree 2
degree = 2
poly_features = PolynomialFeatures(degree=degree)# means to convert to curve of degree 2 y=β0​+β1​x+β2​x2
X_train_poly = poly_features.fit_transform(X_train)#1st col bias term 2nd col x 3rd col x2 then the model predicts β0,β1 and β2
X_test_poly = poly_features.transform(X_test)

model = LinearRegression()
model.fit(X_train_poly, y_train)

y_pred = model.predict(X_test_poly)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Polynomial Regression (degree 2):")
print("MAE:", mae)
print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)

# Visualization
plt.scatter(X_test, y_test, color="blue", label="Actual Data")
plt.scatter(X_test, y_pred, color="red", label="Predicted (Poly)")
plt.xlabel("BMI")
plt.ylabel("Diabetes Progression")
plt.title("Polynomial Regression (Degree 2)")
plt.legend()
plt.show()
results = pd.DataFrame({
    "Actual": y_test,
    "Predicted": y_pred
})

print("\nActual vs Predicted Values (sample):")
print(results.head(10))
