In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import hvplot.pandas  # Import hvplot

# Load data for Newfoundland and Prince Edward Island
df_nl = pd.read_csv('Newfoundland Sectors.csv')

# Drop rows with missing values
df_nl.dropna(inplace=True)

In [2]:
# Split the data into features (X) and target variable (y)
X = df_nl[['Year', 'Industry']]
y = df_nl['Employment']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [3]:
# One-hot encode the 'Industry' column
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X_train_encoded = ct.fit_transform(X_train)
X_test_encoded = ct.transform(X_test)

In [4]:
# Initialize and train the model
model = LinearRegression()
model.fit(X_train_encoded, y_train)


In [5]:
# Create a dataframe with the Cartesian product of years and unique industries
years = range(2024, 2030)
industries = df_nl['Industry'].unique()
future_years = pd.DataFrame([(year, industry) for year in years for industry in industries], columns=['Year', 'Industry'])

In [6]:
# One-hot encode the 'Industry' column in future_years
future_years_encoded = ct.transform(future_years[['Year', 'Industry']])


In [7]:
# Predict future employment for the next 5 years
future_predictions = model.predict(future_years_encoded)


In [8]:
# Assign the corresponding industries to future_years
future_years['Predicted Employment Newfoundland'] = future_predictions.round(1)
future_years

Unnamed: 0,Year,Industry,Predicted Employment Newfoundland
0,2024,Goods-producing sector,53.8
1,2024,Agriculture,5.2
2,2024,"Forestry, fishing, mining, quarrying, oil and gas",19.9
3,2024,Forestry and logging and support activities fo...,5.1
4,2024,"Fishing, hunting and trapping",9.4
...,...,...,...
157,2029,Health care and social assistance,34.9
158,2029,"Information, culture and recreation",11.5
159,2029,Accommodation and food services,16.5
160,2029,Other services (except public administration),15.1


In [13]:
# Save the predicted future employment with industries to a CSV file
future_years.to_csv('predicted_Newfoundland_employment_sector.csv', index=False)

In [10]:
# Plot the predicted employment using hvplot
plot = future_years.hvplot(x='Year', y='Predicted Employment Newfoundland', by='Industry', kind='line', xlabel='Year', ylabel='Predicted Employment', title='Newfoundland Predicted Employment by Industry')
plot