In [None]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import joblib

In [None]:
df = pd.read_csv("canada_per_capita_income.csv")
df.head()

In [None]:
model = linear_model.LinearRegression()
model.fit(df[['year']], df['income'])

# Display slope and intercept
print("Coefficient:", model.coef_)
print("Intercept:", model.intercept_)

# Predict income for the year 2030
prediction_2030 = model.predict(pd.DataFrame([[2030]], columns=["year"]))
print("Predicted income for 2030:", prediction_2030[0])

In [None]:
joblib.dump(model, 'model_joblib')

# Load model
mj = joblib.load('model_joblib')

# Predict for 1965 using loaded model
print("Prediction for 1965:", mj.predict(pd.DataFrame([[1965]], columns=["year"]))[0])
print("Coefficient:", mj.coef_)
print("Intercept:", mj.intercept_)

In [None]:
# Create a binary target: 1 = above median income, 0 = below median
median_income = df['income'].median()
df['income_class'] = np.where(df['income'] >= median_income, 1, 0)

df.head()


In [None]:
X = df[['year']]
y = df['income_class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


In [None]:
# Train a classification model on the new categorical target
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)


In [None]:
# Accuracy and classification report
acc = accuracy_score(y_test, y_pred)
print(f"Classification Accuracy: {acc:.3f}\n")

print("Classification Report:\n")
print(classification_report(y_test, y_pred))


In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Display confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Below Median", "Above Median"])

fig = plt.figure()
disp.plot(values_format="d")
plt.title("Confusion Matrix — Income Above/Below Median")
plt.tight_layout()
plt.show()


In [None]:
# Predict whether future years (e.g. 2030, 2040) are above or below median income
future_years = pd.DataFrame({'year': [2030, 2040, 2050]})
future_class_pred = clf.predict(future_years)

for year, cls in zip(future_years['year'], future_class_pred):
    label = "Above Median" if cls == 1 else "Below Median"
    print(f"{year}: {label}")
