In [None]:
%matplotlib inline
import json
import country_converter as coco
from datetime import datetime, timedelta
import requests
import pandas as pd
import geopandas as gpd
import numpy as np
import math
import matplotlib.pyplot as plt



#Read survey data file
survey_df = pd.read_csv('/home/jovyan/demo/Stack_overflow/survey_results_public.csv',low_memory=False,delimiter=',')


# Setting the path to the shapefile
SHAPEFILE = '/home/jovyan/demo/Countries/ne_10m_admin_0_countries.shp'

# Read shapefile using Geopandas
geo_df = gpd.read_file(SHAPEFILE)[['ADMIN', 'ADM0_A3', 'geometry']]

# Rename columns.
geo_df.columns = ['country', 'country_code', 'geometry']
geo_df.head()


# Drop row for 'Antarctica'. It takes a lot of space in the map and is not of much use
geo_df = geo_df.drop(geo_df.loc[geo_df['country'] == 'Antarctica'].index)

# Print the map
geo_df.plot(figsize=(20, 20), edgecolor='white', linewidth=1, color='lightblue')



# Next, we need to ensure that our data matches with the country codes. 
iso3_codes = geo_df['country'].to_list()

# Convert to iso3_codes
iso2_codes_list = coco.convert(names=iso3_codes, to='ISO2', not_found='NULL')

# Add the list with iso2 codes to the dataframe
geo_df['iso2_code'] = iso2_codes_list

# There are some countries for which the converter could not find a country code. 
# We will drop these countries.
geo_df = geo_df.drop(geo_df.loc[geo_df['iso2_code'] == 'NULL'].index)

geo_df = geo_df.rename(columns={'country': 'Country'})



country_df = survey_df.loc[:,['Respondent','Country','Salary','DevType','Gender']]

country_df = country_df.dropna()

country_Groupby = country_df.groupby(by=["Country"]).count()

top_10_countries = country_Groupby.sort_values(['Respondent','Country'], ascending=False).head(10)



country_df["Salary"]=country_df["Salary"].str.replace(',','')

country_df["Salary"] = pd.to_numeric(country_df['Salary'],errors='coerce')

top_10_countries = top_10_countries.reset_index()  # Country & b are now back out of the index, as a normal columns.
top_10_countries.rename(columns={"index": "Respondent"})
top_10_countries = top_10_countries.loc[:,['Respondent','Country']]

avg_salary = country_df.groupby(by=["Country","DevType"])['Salary'].mean()


avg_salary = avg_salary.reset_index()  # Country & b are now back out of the index, as a normal columns.
avg_salary.rename(columns={"index": "Salary"})


avg_merged_df = pd.merge(avg_salary, top_10_countries, on= "Country")

avg_merged_df = avg_merged_df.sort_values(['Country','Salary'], ascending=False).head(10)



avg_merged_df.head(10)






top_10_countries = top_10_countries.reset_index()  # Country & b are now back out of the index, as a normal columns.
top_10_countries.rename(columns={"index": "Country"})
top_10_countries = top_10_countries.loc[:,['Respondent','Country']]
top_10_countries['Country'] = top_10_countries['Country'].str.replace("United States","United States of America")  # Normal indexing works.
top_10_countries['Country'] = top_10_countries['Country'].str.replace("Russian Federation","Russia")  # Normal indexing works.



top_10_countries.sort_values(['Respondent','Country'], ascending=False).head(10).plot.bar()




merged_df = pd.merge(geo_df, top_10_countries, on= "Country")
merged_df.head(10)





# Print the map
# Set the range for the choropleth
title = 'Number Of Respondents Per Country'
col = 'Respondent'
source = ''
vmin = merged_df[col].min()
vmax = merged_df[col].max()
cmap = 'viridis'

# Create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(20, 8))

# Remove the axis
ax.axis('off')
merged_df.plot(column=col, ax=ax, edgecolor='0.8', linewidth=1, cmap=cmap)


# Add a title
ax.set_title(title, fontdict={'fontsize': '25', 'fontweight': '3'})

# Create an annotation for the data source
ax.annotate(source, xy=(0.1, .08), xycoords='figure fraction', horizontalalignment='left', 
            verticalalignment='bottom', fontsize=10)
            
# Create colorbar as a legend
sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=vmin, vmax=vmax), cmap=cmap)

# Empty array for the data range
sm._A = []

# Add the colorbar to the figure
cbaxes = fig.add_axes([0.15, 0.25, 0.01, 0.4])
cbar = fig.colorbar(sm, cax=cbaxes)