# 1. Batch Prediction Analyst (Regression)

In [None]:
!pip install pandas scikit-learn matplotlib

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

In [None]:
# Load the dataset
df = pd.read_csv('data/housing_data.csv')
df.head()

In [None]:
# Define features (X) and target (y)
features = ['SquareFootage', 'Bedrooms', 'Bathrooms', 'Age']
target = 'Price'

X = df[features]
y = df[target]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)
print("Model trained successfully.")

In [None]:
# Evaluate the model
y_pred = model.predict(X_test)
score = r2_score(y_test, y_pred)

print(f"Model Coefficients: {model.coef_}")
print(f"R-squared Score: {score:.4f}")

In [None]:
# Visualize the results
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs. Predicted Housing Prices')
plt.show()