In [None]:
# Step 1: Data Exploration

# a) Load the dataset and examine its structure.
import pandas as pd

# Creating a sample dataset
data = {'Study Hours': [2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
                        3, 5, 7, 9, 11, 13, 15, 17, 19, 21],
        'Exam Scores': [30, 40, 50, 60, 70, 80, 90, 100, 110, 120,
                        35, 45, 55, 65, 75, 85, 95, 105, 115, 125]}

df = pd.DataFrame(data)

# Display the structure of the dataset
print(df.head())

# b) Identify the features and the target variable.
features = df[['Study Hours']]
target = df['Exam Scores']

# c) Check for missing values and handle them.
print(df.isnull().sum())

# No missing values, so no handling required.

# d) Visualize the relationship between variables.
import matplotlib.pyplot as plt

plt.scatter(df['Study Hours'], df['Exam Scores'])
plt.title('Study Hours vs Exam Scores')
plt.xlabel('Study Hours')
plt.ylabel('Exam Scores')
plt.show()

# Step 2: Data Preprocessing

# a) Split the dataset into training and testing sets.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# b) Standardize the independent variables using appropriate scaling techniques.
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Linear Regression Model

# a) Train a linear regression model on the training data.
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train_scaled, y_train)

# b) Evaluate the performance on the testing data.
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = model.predict(X_test_scaled)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R-squared: {r2}')

# c) Interpret the coefficients of the linear regression model.
coefficients = model.coef_
intercept = model.intercept_

print(f'Coefficients: {coefficients}')
print(f'Intercept: {intercept}')

# Step 4: Model Improvement

# a) No feature engineering is needed in this simple example.

# b) Re-train the linear regression model on the updated dataset.
model.fit(X_train_scaled, y_train)

# c) Evaluate the performance of the improved model.
y_pred_updated = model.predict(X_test_scaled)

mae_updated = mean_absolute_error(y_test, y_pred_updated)
mse_updated = mean_squared_error(y_test, y_pred_updated)
r2_updated = r2_score(y_test, y_pred_updated)

print(f'Improved Model - MAE: {mae_updated}')
print(f'Improved Model - MSE: {mse_updated}')
print(f'Improved Model - R-squared: {r2_updated}')
