In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import hvplot.pandas

# Load data for Newfoundland and Prince Edward Island
df_nl = pd.read_csv('Newfoundland Employment.csv')
df_pei = pd.read_csv('Prince Edward Island Emplyoment.csv')
df_ns = pd.read_csv('Nova Scotia Employment.csv')
df_nb = pd.read_csv('New Brunswick Employment.csv')
df_qc = pd.read_csv('Quebec Employment.csv')
df_ont = pd.read_csv('Ontario Employment.csv')
df_mb = pd.read_csv('Manitoba Employment.csv')
df_sask = pd.read_csv('Saskatchewan Employment.csv')
df_ab = pd.read_csv('Alberta Employment.csv')
df_bc = pd.read_csv('British Columbia Employment.csv')

# Remove empty columns from df_ab and df_bc
df_ab = df_ab.dropna(axis=1, how='all')
df_bc = df_bc.dropna(axis=1, how='all')

# Function to preprocess data, train the model, and predict employment
def predict_employment(df, province_name):
    # Preprocess the target variable to remove commas and convert to numeric
    df['Total, all industries '] = df['Total, all industries '].replace(',', '', regex=True).astype(float)
#    print (df.columns)
   
    
    # Split the data into features (X) and target variable (y)
    X = df[['Year']]
    y = df['Total, all industries ']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Initialize and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict future employment for the next 5 years
    future_years = pd.DataFrame({'Year': range(2024, 2030)})
    future_predictions = model.predict(future_years[['Year']])
    
    # Calculate the predicted growth in percentages over five years
    initial_value = future_predictions[0]  
    final_value = future_predictions[-1]   #
    percentage_growth = ((final_value - initial_value) / initial_value) * 100
    
    
    future_years['Predicted Employment'] = future_predictions.round(1)

    # Add province name and percentage growth to the DataFrame
    future_years['Province'] = province_name
    future_years['Percentage Growth'] = percentage_growth

    return future_years

# Predict employment for Provinces
predicted_nl = predict_employment(df_nl, 'Newfoundland')
predicted_pei = predict_employment(df_pei, 'Prince Edward Island')
predicted_ns = predict_employment(df_ns, 'Nova Scotia ')
predicted_nb = predict_employment(df_nb, 'New Brunswick')
predicted_qc = predict_employment(df_qc, 'Quebec')
predicted_ont = predict_employment(df_ont, 'Ontario')
predicted_mb = predict_employment(df_mb, 'Manitoba')
predicted_sask = predict_employment(df_sask, 'Saskatchewan')
predicted_ab = predict_employment(df_ab, 'Alberta')
predicted_bc = predict_employment(df_bc, 'British Columbia')

# Combine the predicted data for all provinces
predicted_combined = pd.concat([predicted_nl, predicted_pei, predicted_ns, predicted_nb, predicted_qc, predicted_ont, predicted_mb, predicted_sask, predicted_ab, predicted_bc])

# Plot the combined data on the same graph using hvplot
plot_combined = predicted_combined.hvplot.line(
    x='Year',
    y='Predicted Employment',
    by='Province',
    title='Predicted Employment for Next 5 Years',
    hover_cols=['Year', 'Predicted Employment', 'Percentage Growth'],  
    hover=True  
).opts(xlabel='Year', ylabel='Predicted Employment')

# Show the combined plot
plot_combined


In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load data for Newfoundland and Prince Edward Island
df_nl = pd.read_csv('Newfoundland Employment.csv')
df_pei = pd.read_csv('Prince Edward Island Emplyoment.csv')
df_ns = pd.read_csv('Nova Scotia Employment.csv')
df_nb = pd.read_csv('New Brunswick Employment.csv')
df_qc = pd.read_csv('Quebec Employment.csv')
df_ont = pd.read_csv('Ontario Employment.csv')
df_mb = pd.read_csv('Manitoba Employment.csv')
df_sask = pd.read_csv('Saskatchewan Employment.csv')
df_ab = pd.read_csv('Alberta Employment.csv')
df_bc = pd.read_csv('British Columbia Employment.csv')

# Remove empty columns from df_ab and df_bc
df_ab = df_ab.dropna(axis=1, how='all')
df_bc = df_bc.dropna(axis=1, how='all')

# Function to preprocess data, train the model, and predict employment
def predict_employment(df, province_name):
    # Preprocess the target variable to remove commas and convert to numeric
    df['Total, all industries '] = df['Total, all industries '].replace(',', '', regex=True).astype(float)

    # Split the data into features (X) and target variable (y)
    X = df[['Year']]
    y = df['Total, all industries ']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Initialize and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict future employment for the next 5 years
    future_years = pd.DataFrame({'Year': range(2024, 2030)})
    future_predictions = model.predict(future_years[['Year']])

    # Add predicted employment to the DataFrame
    future_years['Predicted Employment'] = future_predictions.round(1)

    # Add province name to the DataFrame
    future_years['Province'] = province_name

    return future_years

# Predict employment for Provinces
predicted_nl = predict_employment(df_nl, 'Newfoundland')
predicted_pei = predict_employment(df_pei, 'Prince Edward Island')
predicted_ns = predict_employment(df_ns, 'Nova Scotia ')
predicted_nb = predict_employment(df_nb, 'New Brunswick')
predicted_qc = predict_employment(df_qc, 'Quebec')
predicted_ont = predict_employment(df_ont, 'Ontario')
predicted_mb = predict_employment(df_mb, 'Manitoba')
predicted_sask = predict_employment(df_sask, 'Saskatchewan')
predicted_ab = predict_employment(df_ab, 'Alberta')
predicted_bc = predict_employment(df_bc, 'British Columbia')

# Combine the predicted data for all provinces
predicted_combined = pd.concat([predicted_nl, predicted_pei, predicted_ns, predicted_nb, predicted_qc, predicted_ont, predicted_mb, predicted_sask, predicted_ab, predicted_bc])

# Save the DataFrame to a CSV file
predicted_combined.to_csv('predicted_employment_provinces.csv', index=False)


# Plot the combined data on the same graph using hvplot
plot_combined = predicted_combined.hvplot.line(
    x='Year',
    y='Predicted Employment',
    by='Province',
    title='Predicted Employment for Next 5 Years',
    hover_cols=['Year', 'Predicted Employment', 'Percentage Growth'],  
    hover=True  
).opts(xlabel='Year', ylabel='Predicted Employment')

# Show the combined plot
plot_combined


