In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# df = pd.read_csv('weight-height.csv')

# OPTION B: Create dummy data for the exam (Run this if you don't have a file)
data = {
    'Height': [147, 150, 153, 158, 163, 165, 168, 170, 173, 175, 178, 180, 183],
    'Weight': [52, 53, 54, 56, 59, 61, 63, 64, 66, 68, 70, 72, 74]
}
df = pd.DataFrame(data)

# ==========================================
# Step 2: Preprocessing
# ==========================================
# X must be 2D array (Matrix) for sklearn, y is 1D array (Vector)
X = df[['Height']]  # Double brackets [['...']] keep it as a DataFrame (2D)
y = df['Weight']    # Single bracket ['...'] makes it a Series (1D)

# ==========================================
# Step 3: Split Data (Train & Test)
# ==========================================
# 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ==========================================
# Step 4: Train the Model
# ==========================================
model = LinearRegression()
model.fit(X_train, y_train)

# ==========================================
# Step 5: Prediction
# ==========================================
y_pred = model.predict(X_test)

# Predict weight for a specific height (e.g., 170 cm)
user_height = [[170]] 
predicted_weight = model.predict(user_height)
print(f"Predicted weight for 170cm: {predicted_weight[0]:.2f} kg")

# ==========================================
# Step 6: Evaluation
# ==========================================
print("Coefficients (Slope/m):", model.coef_[0])
print("Intercept (c):", model.intercept_)
print("R2 Score:", r2_score(y_test, y_pred))

# ==========================================
# Step 7: Visualization
# ==========================================
plt.scatter(X, y, color='blue', label='Actual Data')  # Plot all actual data
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression Line') # Plot prediction line
plt.xlabel('Height')
plt.ylabel('Weight')
plt.title('Height vs Weight Prediction')
plt.legend()
plt.show()

ModuleNotFoundError: No module named 'matplotlib'