In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load data
df_nl = pd.read_csv('CPI Newfoundland and Labrador.csv')
df_pei = pd.read_csv('CPI PEI.csv')
df_ns = pd.read_csv('CPI Nova Scotia.csv')
df_nb = pd.read_csv('CPI New Brunswick.csv')
df_qc = pd.read_csv('CPI Quebec.csv')
df_ont = pd.read_csv('CPI Ontario.csv')
df_mb = pd.read_csv('CPI Manitoba.csv')
df_sask = pd.read_csv('CPI Saskatchewan.csv')
df_ab = pd.read_csv('CPI Alberta.csv')
df_bc = pd.read_csv('CPI British Columbia.csv')
df_yk = pd.read_csv('CPI Yellowknife, Northwest Territories.csv')
df_wh = pd.read_csv('CPI Whitehorse, Yukon.csv')

# Function to preprocess data, train the model, and predict employment
def predict_cpi(df, province_name):
    df = df.dropna(axis=1, how='all')  # Clean empty columns
    product_columns = df['Products'].unique()  # Working with unique product names
    
    predictions_df = pd.DataFrame({'Year': range(2024, 2030), 'Province': [province_name]*6})
    
    for product in product_columns:
        df_product = df[df['Products'] == product].copy()
        print(f"Processing {product}, Entries: {len(df_product)}")  # Debug: check entries count

        if df_product.empty:
            print(f"No data available for product {product}. Skipping...")
            continue

        # Preprocessing to ensure all entries are numeric
        df_product['CPI'] = pd.to_numeric(df_product['CPI'], errors='coerce')
        df_product = df_product.dropna(subset=['CPI'])

        X = df_product[['Year']]
        y = df_product['CPI']

        if len(X) < 5:  # Ensure there are enough entries for a train/test split
            print(f"Not enough data to train for {product}. Needs more than 5, has {len(X)}")
            continue

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
        
        model = LinearRegression()
        model.fit(X_train, y_train)
        
        predictions = model.predict(pd.DataFrame({'Year': range(2024, 2030)}))
        predictions_df[product] = predictions.round(2)  # Predictions rounded to 2 decimal places
    
    return predictions_df

# Predict employment for each province
predicted_nl = predict_cpi(df_nl, 'Newfoundland')
predicted_pei = predict_cpi(df_pei, 'Prince Edward Island')
predicted_ns = predict_cpi(df_ns, 'Nova Scotia')
predicted_nb = predict_cpi(df_nb, 'New Brunswick')
predicted_qc = predict_cpi(df_qc, 'Quebec')
predicted_ont = predict_cpi(df_ont, 'Ontario')
predicted_mb = predict_cpi(df_mb, 'Manitoba')
predicted_sask = predict_cpi(df_sask, 'Saskatchewan')
predicted_ab = predict_cpi(df_ab, 'Alberta')
predicted_bc = predict_cpi(df_bc, 'British Columbia')
predicted_nt = predict_cpi(df_ab, 'Northwest Territories')
predicted_yk = predict_cpi(df_bc, 'Yukon')
# Combine all predictions into one DataFrame
predicted_combined = pd.concat([predicted_nl, predicted_pei, predicted_ns, predicted_nb, predicted_qc, predicted_ont, predicted_mb, predicted_sask, predicted_ab, predicted_bc,predicted_nt,predicted_yk], ignore_index=True)

predicted_combined.head()

Processing All-items, Entries: 45
Processing nan, Entries: 0
No data available for product nan. Skipping...
Processing Food , Entries: 45
Processing Shelter , Entries: 45
Processing Household operations, furnishings and equipment, Entries: 45
Processing Clothing and footwear, Entries: 45
Processing Transportation, Entries: 45
Processing Gasoline, Entries: 45
Processing Health and personal care, Entries: 45
Processing Recreation, education and reading, Entries: 45
Processing Alcoholic beverages, tobacco products and recreational cannabis, Entries: 45
Processing All-items excluding food and energy , Entries: 45
Processing All-items excluding energy , Entries: 45
Processing Energy , Entries: 45
Processing Goods , Entries: 45
Processing Services, Entries: 45
Processing All-items, Entries: 45
Processing nan, Entries: 0
No data available for product nan. Skipping...
Processing Food , Entries: 45
Processing Shelter , Entries: 45
Processing Household operations, furnishings and equipment, Entr

Processing All-items excluding energy , Entries: 45
Processing Energy , Entries: 45
Processing Goods , Entries: 45
Processing Services, Entries: 45
Processing All-items, Entries: 45
Processing nan, Entries: 0
No data available for product nan. Skipping...
Processing Food , Entries: 45
Processing Shelter , Entries: 45
Processing Household operations, furnishings and equipment, Entries: 45
Processing Clothing and footwear, Entries: 45
Processing Transportation, Entries: 45
Processing Gasoline, Entries: 45
Processing Health and personal care, Entries: 45
Processing Recreation, education and reading, Entries: 45
Processing Alcoholic beverages, tobacco products and recreational cannabis, Entries: 45
Processing All-items excluding food and energy , Entries: 45
Processing All-items excluding energy , Entries: 45
Processing Energy , Entries: 45
Processing Goods , Entries: 45
Processing Services, Entries: 45


Unnamed: 0,Year,Province,All-items,Food,Shelter,"Household operations, furnishings and equipment",Clothing and footwear,Transportation,Gasoline,Health and personal care,"Recreation, education and reading","Alcoholic beverages, tobacco products and recreational cannabis",All-items excluding food and energy,All-items excluding energy,Energy,Goods,Services
0,2024,Newfoundland,152.23,161.59,176.32,128.25,106.08,160.54,196.16,134.27,124.47,199.53,142.25,145.98,202.09,144.76,163.44
1,2025,Newfoundland,154.48,164.01,179.2,129.6,106.78,163.25,199.72,135.88,126.0,203.49,144.23,148.04,205.8,146.69,166.17
2,2026,Newfoundland,156.73,166.44,182.08,130.94,107.47,165.96,203.29,137.49,127.53,207.45,146.21,150.11,209.51,148.63,168.89
3,2027,Newfoundland,158.98,168.86,184.96,132.28,108.16,168.67,206.85,139.1,129.06,211.41,148.19,152.17,213.22,150.56,171.62
4,2028,Newfoundland,161.23,171.29,187.84,133.62,108.85,171.37,210.42,140.71,130.59,215.37,150.17,154.23,216.92,152.49,174.34


In [5]:
predicted_combined.head(10)

Unnamed: 0,Year,Province,All-items,Food,Shelter,"Household operations, furnishings and equipment",Clothing and footwear,Transportation,Gasoline,Health and personal care,"Recreation, education and reading","Alcoholic beverages, tobacco products and recreational cannabis",All-items excluding food and energy,All-items excluding energy,Energy,Goods,Services
0,2024,Newfoundland,152.23,161.59,176.32,128.25,106.08,160.54,196.16,134.27,124.47,199.53,142.25,145.98,202.09,144.76,163.44
1,2025,Newfoundland,154.48,164.01,179.2,129.6,106.78,163.25,199.72,135.88,126.0,203.49,144.23,148.04,205.8,146.69,166.17
2,2026,Newfoundland,156.73,166.44,182.08,130.94,107.47,165.96,203.29,137.49,127.53,207.45,146.21,150.11,209.51,148.63,168.89
3,2027,Newfoundland,158.98,168.86,184.96,132.28,108.16,168.67,206.85,139.1,129.06,211.41,148.19,152.17,213.22,150.56,171.62
4,2028,Newfoundland,161.23,171.29,187.84,133.62,108.85,171.37,210.42,140.71,130.59,215.37,150.17,154.23,216.92,152.49,174.34
5,2029,Newfoundland,163.48,173.71,190.72,134.96,109.54,174.08,213.98,142.32,132.12,219.33,152.15,156.3,220.63,154.43,177.07
6,2024,Prince Edward Island,153.65,169.71,157.37,141.0,111.55,158.62,210.56,142.77,132.75,217.04,141.16,146.46,215.84,154.71,152.05
7,2025,Prince Edward Island,155.97,172.46,159.63,142.9,112.44,161.19,214.42,144.82,134.56,221.75,143.14,148.58,219.84,157.01,154.39
8,2026,Prince Edward Island,158.29,175.22,161.89,144.8,113.34,163.75,218.27,146.86,136.37,226.47,145.12,150.7,223.84,159.31,156.73
9,2027,Prince Edward Island,160.61,177.97,164.16,146.69,114.24,166.31,222.12,148.91,138.19,231.18,147.11,152.83,227.85,161.61,159.07


In [6]:
predicted_combined.to_csv('Predicted_CPI_Growth.csv', index=False)