In [1]:
import pandas as pd
import numpy as np
#url = 'https://raw.githubusercontent.com/mattharrison/datasets/master/data/2020-jetbrains-python-survey.csv'
url = 'https://github.com/mattharrison/datasets/raw/master/data/2020-jetbrains-python-survey.csv'
df = pd.read_csv(url)

import collections
counter = collections.defaultdict(list)
for col in sorted(df.columns):
    period_count = col.count('.')
    if period_count >= 2:
        part_end = 2
    else:
        part_end = 1
    parts = col.split('.')[:part_end]
    counter['.'.join(parts)].append(col)
uniq_cols = []
for cols in counter.values():
    if len(cols) == 1:
        uniq_cols.extend(cols)
        
df = (df
 [uniq_cols]
 .rename(columns=lambda c: c.replace('.', '_'))
 .assign(age=lambda df_:df_.age.str.slice(0,2).astype(float)
             .astype('Int64'),
         are_you_datascientist=lambda df_:df_.are_you_datascientist
             .replace({'Yes': True, 'No': False, np.nan: False}),
         company_size=lambda df_:df_.company_size.replace({
             'Just me': 1, 'Not sure': np.nan, 
             'More than 5,000': 5000, '2–10': 2, '11–50':11,
             '51–500': 51, '501–1,000':501,
             '1,001–5,000':1001}).astype('Int64'),
         country_live=lambda df_:df_.country_live.astype('category'),
         employment_status=lambda df_:df_.employment_status
              .fillna('Other').astype('category'),
         is_python_main=lambda df_:df_.is_python_main
              .astype('category'),
         team_size=lambda df_:df_.team_size
             .str.split(r'-', n=1, expand=True)
             .iloc[:,0].replace('More than 40 people', 41)
             .where(df_.company_size!=1, 1).astype(float),
         years_of_coding=lambda df_:df_.years_of_coding
             .replace('Less than 1 year', .5).str.extract(r'(\d+)')
             .astype(float),
         python_years=lambda df_:df_.python_years
             .replace('Less than 1 year', .5).str.extract(r'(\d+)')
             .astype(float),
         python3_ver=lambda df_:df_.python3_version_most
              .str.replace('_', '.').str.extract(r'(\d\.\d)')
              .astype(float),
         use_python_most=lambda df_:df_.use_python_most
              .fillna('Unknown')
        )
    .drop(columns=['python2_version_most'])
)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
df.head()

Unnamed: 0,age,are_you_datascientist,company_size,country_live,employment_status,first_learn_about_main_ide,how_often_use_main_ide,ide_main,is_python_main,job_team,main_purposes,missing_features_main_ide,nps_main_ide,python_years,python3_version_most,several_projects,team_size,use_python_most,years_of_coding,python3_ver
0,30.0,False,1.0,,Partially employed by a company / organization,Conference / User Group,Weekly,PyCharm Community Edition,Yes,Work as an external consultant or trainer,For work,"No, it has all the features I need",3.0,3.0,Python 3_7,"Yes, I work on many different projects",1.0,Unknown,1.0,3.7
1,21.0,True,5000.0,India,Fully employed by a company / organization,School / University,Daily,VS Code,Yes,Work in a team,Both for work and personal,"No, it has all the features I need",8.0,3.0,Python 3_6,"Yes, I work on one main and several side projects",2.0,Software prototyping,3.0,3.6
2,30.0,False,5000.0,United States,Fully employed by a company / organization,Friend / Colleague,Daily,Vim,Yes,Work on your own project(s) independently,Both for work and personal,"No, it has all the features I need",10.0,3.0,Python 3_6,"Yes, I work on one main and several side projects",,DevOps / System administration / Writing autom...,3.0,3.6
3,,False,,,Other,Friend / Colleague,Daily,PyCharm Professional Edition,Yes,,Both for work and personal,Yes – Please list:,10.0,11.0,Python 3_8,"Yes, I work on many different projects",1.0,Web development,11.0,3.8
4,21.0,False,,Italy,Student,Search engines,Daily,VS Code,Yes,Work on your own project(s) independently,"For personal, educational or side projects","No, it has all the features I need",10.0,1.0,Python 3_8,"Yes, I work on one main and several side projects",1.0,Web development,,3.8


### Pivot with styling

In [4]:
tmp_df = (pd.crosstab(index=[df.country_live, df.age],
                     columns=[df.use_python_most, df.python3_version_most])
          .loc[['United States'], ['Data analysis', 'Web development']]
          .style.background_gradient(cmap='viridis', axis=None))

In [5]:
tmp_df

Unnamed: 0_level_0,use_python_most,Data analysis,Data analysis,Data analysis,Data analysis,Data analysis,Web development,Web development,Web development,Web development,Web development
Unnamed: 0_level_1,python3_version_most,Python 3_5 or lower,Python 3_6,Python 3_7,Python 3_8,Python 3_9,Python 3_5 or lower,Python 3_6,Python 3_7,Python 3_8,Python 3_9
country_live,age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
United States,18,0,1,5,10,4,0,3,3,11,0
United States,21,2,25,74,100,21,3,33,64,103,7
United States,30,3,34,83,116,17,3,65,90,143,14
United States,40,0,18,39,57,5,1,16,34,73,10
United States,50,2,7,33,47,2,0,11,14,21,2
United States,60,1,2,12,14,3,0,3,8,8,1


In [8]:
tmp_df = (pd.crosstab(index=[df.country_live, df.age],
                     columns=[df.use_python_most, df.python3_version_most])
          .loc[['United States'], ['Data analysis', 'Web development']]
          .style.bar(subset=['Data analysis'], color='lightgreen')
          .bar(subset=['Web development'], color='blue'))

In [9]:
tmp_df

Unnamed: 0_level_0,use_python_most,Data analysis,Data analysis,Data analysis,Data analysis,Data analysis,Web development,Web development,Web development,Web development,Web development
Unnamed: 0_level_1,python3_version_most,Python 3_5 or lower,Python 3_6,Python 3_7,Python 3_8,Python 3_9,Python 3_5 or lower,Python 3_6,Python 3_7,Python 3_8,Python 3_9
country_live,age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
United States,18,0,1,5,10,4,0,3,3,11,0
United States,21,2,25,74,100,21,3,33,64,103,7
United States,30,3,34,83,116,17,3,65,90,143,14
United States,40,0,18,39,57,5,1,16,34,73,10
United States,50,2,7,33,47,2,0,11,14,21,2
United States,60,1,2,12,14,3,0,3,8,8,1
