# Practical 3: Supervised Learning - Regression
## Predicting Image Brightness from RGB Features

### 1. Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
print("Libraries imported successfully!")

### 2. Load Preprocessed Data

In [None]:
# Load preprocessed data from Practical 2
if os.path.exists('preprocessed_data.csv'):
    df = pd.read_csv('preprocessed_data.csv')
    print(f"Data loaded: {len(df)} samples")
else:
    print("Error: Please run Practical 2 first to generate preprocessed data")

df.head()

### 3. Prepare Data for Regression

In [None]:
# Define features (X) and target (y)
# We'll predict brightness from RGB channel means
feature_cols = ['mean_red', 'mean_green', 'mean_blue']
target_col = 'brightness'

X = df[feature_cols]
y = df[target_col]

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"\nFeatures: {feature_cols}")
print(f"Target: {target_col}")

### 4. Split Data into Training and Testing Sets

In [None]:
# Split data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)} samples")
print(f"Testing set size: {len(X_test)} samples")
print(f"\nTraining set: {len(X_train)/len(X)*100:.1f}%")
print(f"Testing set: {len(X_test)/len(X)*100:.1f}%")

### 5. Build Linear Regression Model

In [None]:
# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

print("Linear Regression Model trained successfully!")
print(f"\nModel Coefficients:")
for feature, coef in zip(feature_cols, model.coef_):
    print(f"  {feature}: {coef:.4f}")
print(f"\nIntercept: {model.intercept_:.4f}")

### 6. Make Predictions

In [None]:
# Predict on training and testing sets
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print("Predictions made successfully!")
print(f"\nSample predictions (first 5):")
comparison_df = pd.DataFrame({
    'Actual': y_test.values[:5],
    'Predicted': y_test_pred[:5],
    'Difference': y_test.values[:5] - y_test_pred[:5]
})
print(comparison_df)

### 7. Evaluate Model Performance

In [None]:
# Calculate metrics for training set
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_mae = mean_absolute_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

# Calculate metrics for testing set
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("="*60)
print("MODEL PERFORMANCE METRICS")
print("="*60)
print("\nTraining Set:")
print(f"  Mean Squared Error (MSE): {train_mse:.4f}")
print(f"  Root Mean Squared Error (RMSE): {train_rmse:.4f}")
print(f"  Mean Absolute Error (MAE): {train_mae:.4f}")
print(f"  R² Score: {train_r2:.4f}")

print("\nTesting Set:")
print(f"  Mean Squared Error (MSE): {test_mse:.4f}")
print(f"  Root Mean Squared Error (RMSE): {test_rmse:.4f}")
print(f"  Mean Absolute Error (MAE): {test_mae:.4f}")
print(f"  R² Score: {test_r2:.4f}")
print("="*60)

### 8. Visualize Results - Actual vs Predicted

In [None]:
# Scatter plot: Actual vs Predicted
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_test_pred, alpha=0.6, edgecolors='k', s=80)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Brightness', fontsize=12, fontweight='bold')
plt.ylabel('Predicted Brightness', fontsize=12, fontweight='bold')
plt.title('Actual vs Predicted Brightness', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

### 9. Residual Analysis

In [None]:
# Calculate residuals
residuals = y_test - y_test_pred

# Residual plot
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Residuals vs Predicted
axes[0].scatter(y_test_pred, residuals, alpha=0.6, edgecolors='k')
axes[0].axhline(y=0, color='r', linestyle='--', lw=2)
axes[0].set_xlabel('Predicted Brightness', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Residuals', fontsize=12, fontweight='bold')
axes[0].set_title('Residual Plot', fontsize=14, fontweight='bold')
axes[0].grid(True, alpha=0.3)

# Residual distribution
axes[1].hist(residuals, bins=30, edgecolor='black', alpha=0.7)
axes[1].set_xlabel('Residuals', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Frequency', fontsize=12, fontweight='bold')
axes[1].set_title('Residual Distribution', fontsize=14, fontweight='bold')
axes[1].axvline(x=0, color='r', linestyle='--', lw=2)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### 10. Feature Importance Visualization

In [None]:
# Plot feature coefficients
plt.figure(figsize=(10, 6))
coefficients = pd.DataFrame({
    'Feature': feature_cols,
    'Coefficient': model.coef_
}).sort_values('Coefficient', ascending=False)

sns.barplot(data=coefficients, x='Feature', y='Coefficient', palette='viridis')
plt.title('Feature Importance (Coefficients)', fontsize=14, fontweight='bold')
plt.xlabel('Features', fontsize=12, fontweight='bold')
plt.ylabel('Coefficient Value', fontsize=12, fontweight='bold')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

### 11. Prediction on New Data

In [None]:
# Make predictions on new sample data
new_data = pd.DataFrame({
    'mean_red': [0.5, -0.3, 1.2],
    'mean_green': [0.8, -0.5, 1.0],
    'mean_blue': [0.3, -0.8, 0.9]
})

new_predictions = model.predict(new_data)

print("Predictions on new data:")
new_data['Predicted_Brightness'] = new_predictions
print(new_data)

### 12. Save Model Results

In [None]:
# Save predictions
results_df = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': y_test_pred,
    'Residual': residuals
})
results_df.to_csv('regression_results.csv', index=False)
print("Results saved to 'regression_results.csv'")

# Save model metrics
metrics_df = pd.DataFrame({
    'Metric': ['MSE', 'RMSE', 'MAE', 'R2_Score'],
    'Training': [train_mse, train_rmse, train_mae, train_r2],
    'Testing': [test_mse, test_rmse, test_mae, test_r2]
})
metrics_df.to_csv('model_metrics.csv', index=False)
print("Metrics saved to 'model_metrics.csv'")

### 13. Summary Report

In [None]:
print("="*70)
print("LINEAR REGRESSION MODEL SUMMARY")
print("="*70)
print(f"\nModel Type: Linear Regression")
print(f"Target Variable: {target_col}")
print(f"Features Used: {', '.join(feature_cols)}")
print(f"\nDataset Split:")
print(f"  Training samples: {len(X_train)}")
print(f"  Testing samples: {len(X_test)}")
print(f"\nModel Performance (Test Set):")
print(f"  R² Score: {test_r2:.4f} ({test_r2*100:.2f}% variance explained)")
print(f"  RMSE: {test_rmse:.4f}")
print(f"  MAE: {test_mae:.4f}")
print(f"\nModel Equation:")
equation = f"Brightness = {model.intercept_:.4f}"
for feature, coef in zip(feature_cols, model.coef_):
    equation += f" + ({coef:.4f} × {feature})"
print(f"  {equation}")
print("="*70)