In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv(r"C:\Users\Casper Ruta\Desktop\csj-results-cleaned.csv")
df.head(10)

In [None]:
df['Cost Per Job'] = df['Cost Per Job'].replace('[\$,]', '', regex=True).astype(float)
df['Total_Amount_Paid'] = df['Total_Amount_Paid'].replace('[\$,]', '', regex=True).astype(float)
df['Total_Amount_Paid'] = df['Total_Amount_Paid'].replace('[\$,]', '', regex=True).astype(float)

In [None]:
top10 = df.nlargest(10,'Cost Per Job')
sns.barplot(data=top10, x='Cost Per Job', y='Constituency', palette='viridis') 
plt.title('Least Bang for Your Bucks')
plt.tight_layout()
plt.show()  

In [None]:
bottom10 = df.nsmallest(10, 'Cost Per Job')
sns.barplot(data=bottom10, x='Cost Per Job', y='Constituency', palette='viridis')
plt.title('Best Bang for Your Bucks')
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
#custom ticks 
custom_ticks = [500, 1000, 1500, 2000, 2500, 2600, 2700, 2800, 2900, 3000]
max_val = df['Cost Per Job'].max()
extra_ticks = list(range(3100, int(max_val) + 100, 100))
all_ticks = custom_ticks + extra_ticks
ax1.set_xticks(all_ticks)
ax1.tick_params(axis='x', rotation=45)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
# Left chart — Most expensive
top10 = df.nlargest(10, 'Cost Per Job')
sns.barplot(data=top10, x='Cost Per Job', y='Constituency', palette='coolwarm', ax=ax1)
ax1.set_title('Least Bang for Your Bucks')

# Right chart — Least expensive
bottom10 = df.nsmallest(10, 'Cost Per Job')
sns.barplot(data=bottom10, x='Cost Per Job', y='Constituency', palette='coolwarm', ax=ax2,)
ax2.set_title('Best Bang for Your Bucks')
plt.tight_layout()
plt.subplots_adjust(wspace=0.5)
plt.show()
fig.patch.set_alpha(0)


In [None]:
df['City'] = df['Constituency'].str.split(',').str[0]
print(df['City'])

In [None]:
# Filter all Edmonton constituencies
edmonton = df[df['Constituency'].str.contains('Edmonton', case=False)]

print(edmonton)
    

In [None]:
## Calculating the Totals for Edmonton & Compare it to Calgary 
edmonton = df[df['Constituency'].str.contains('Edmonton', case=False)]
calgary = df[df['Constituency'].str.contains('Calgary', case=False)]

# Calculate totals for each city
cities_comparison = pd.DataFrame({
    'City': ['Edmonton', 'Calgary'],
    'Total_Amount_Paid': [edmonton['Total_Amount_Paid'].sum(), calgary['Total_Amount_Paid'].sum()],
    'Total_Jobs_Created': [edmonton['Total_Jobs_Created'].sum(), calgary['Total_Jobs_Created'].sum()]
})
cities_comparison['Cost Per Job'] = cities_comparison['Total_Amount_Paid'] / cities_comparison['Total_Jobs_Created']

print(cities_comparison)


In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
fig.patch.set_alpha(0)

# Chart 1 — Total Amount Paid
sns.barplot(data=cities_comparison, x='City', y='Total_Amount_Paid', palette=['steelblue', 'coral'], ax=ax1)
ax1.set_title('Total Amount Paid', fontname='Georgia', fontsize=14, fontweight='bold')
ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f'${x:,.0f}'))
ax1.patch.set_alpha(0)

# Chart 2 — Total Jobs Created
sns.barplot(data=cities_comparison, x='City', y='Total_Jobs_Created', palette=['steelblue', 'coral'], ax=ax2)
ax2.set_title('Total Jobs Created', fontname='Georgia', fontsize=14, fontweight='bold')
ax2.patch.set_alpha(0)

# Chart 3 — Cost Per Job
sns.barplot(data=cities_comparison, x='City', y='Cost Per Job', palette=['steelblue', 'coral'], ax=ax3)
ax3.set_title('Cost Per Job', fontname='Georgia', fontsize=14, fontweight='bold')
ax3.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f'${x:,.0f}'))
ax3.patch.set_alpha(0)

plt.subplots_adjust(wspace=0.4)
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Load and clean data
df = pd.read_csv(r"C:\Users\Casper Ruta\Desktop\csj-results-cleaned.csv")
df['Total_Amount_Paid'] = df['Total_Amount_Paid'].replace('[\$,]', '', regex=True).astype(float)
df['Cost Per Job'] = df['Cost Per Job'].replace('[\$,]', '', regex=True).astype(float)

# Filter the three cities
df_filtered = df[df['Constituency'].str.contains('Calgary|Vancouver|Edmonton', case=False)].copy()

# Add City column
df_filtered['City'] = df_filtered['Constituency'].apply(
    lambda x: 'Calgary' if 'calgary' in x.lower()
         else 'Vancouver' if 'vancouver' in x.lower()
         else 'Edmonton'
)

# Scatterplot — each dot is a constituency
fig, ax = plt.subplots(figsize=(10, 8))
sns.stripplot(data=df_filtered, x='City', y='Cost Per Job',
              palette=['mediumseagreen', 'coral', 'steelblue'],
              s=12, edgecolor='black', linewidth=0.5, jitter=True, ax=ax)

ax.set_title('Cost Per Job by Constituency', fontname='Arial', fontsize=16, fontweight='normal')
ax.set_ylabel('Cost Per Job')
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f'${x:,.0f}'))
fig.patch.set_alpha(0)
ax.patch.set_alpha(0)
plt.show()