# dTale

In [5]:
import dtale
import pandas as pd

df = pd.read_csv('../../data/prep_survey_profile.csv')

# launch dtale in browser
d = dtale.show(df, port=4000)
print(d._main_url)

http://S340-PAD:4000/dtale/main/1


In [102]:
# end dtale session
d.kill()

2024-12-18 17:35:19,558 - INFO     - Executing shutdown...
2024-12-18 17:35:19,564 - INFO     - Not running with the Werkzeug Server, exiting by searching gc for BaseWSGIServer


# Univariate Analysis, Bivariate Analysis, Multivariate Analysis

In [None]:
# What is your age (# years)? -->'25-29','30-34','22-24','35-39','40-44','45-49','50-54','18-21','55-59','60-69','70+'
# What is your gender? -->'Male','Female'
# In which country do you currently reside? --> (List of countries)
# What is the highest level of formal education that you have attained or plan to attain within the next 2 years? -->'Master's Degree','Bachelor's Degree','Doctoral Degree','Professional Degree','Some college university study without earning a bachelor's degree','I prefer not to answer','No formal education past high school'
# Select the title most similar to your current role (or most recent title if retired) -->'Data Scientist','Software Engineer','Research Scientist','Data Analyst','Other','Product/Project Manager','Data Engineer','Business Analyst','Statistician','DBA/Database Engineer'
# What is the size of the company where you are employed? -->'Small','Enterprise','Large','Medium','Mid-Large'
# Approximately how many individuals are responsible for data science workloads at your place of business? -->'20+','1-2','3-4','5-9','0','10-14','15-19'
# Does your current employer incorporate machine learning methods into their business? -->'I do not know','No (we do not use ML methods)','We are exploring ML methods (and may one day put a model into production)','We recently started using ML methods (i.e., models in production for less than 2 years)','We use ML methods for generating insights (but do not put working models into production)','We have well established ML methods (i.e., models in production for more than 2 years)'
# What is your current yearly compensation (approximate $USD)? --> '$0-999','$100,000-124,999','40,000-49,999','125,000-149,999','10,000-14,999','30,000-39,999','50,000-59,999','20,000-24,999','150,000-199,999','60,000-69,999','70,000-79,999','90,000-99,999','25,000-29,999','15,000-19,999','80,000-89,999','5,000-7,499','1,000-1,9999','4,000-4,999','7,500-7,999','200,000-249,999','2,000-2,999','3,000-3,999','250,000-299,999','300,000-500,000','> $500,000'
# Approximately how much money have you spent on machine learning and/or cloud computing products at your work in the past 5 years? -->'$0 (USD)','$100-$999','$1000-$9,999','$1-$999','$10,000-$99,999','>$100,000 ($USD)'
# What is the primary tool that you use at work or school to analyze data? -->'Local development environments','Basic statistical software','Cloud-based data software & APIs','Other','Business intelligence software','Advanced statistical software'
# How long have you been writing code to analyze data (at work or at school)? -->'3-5 years','1-2 years','5-10 years','< 1 years','10-20 years','20+ years'
# What programming language would you recommend an aspiring data scientist to learn first? -->'Python','R','SQL','Other'
# Have you ever used a TPU (tensor processing unit)? -->'Never','Used TPU'
# For how many years have you used machine learning methods? -->'1-2 years','< 1 years','2-3 years','3-4 years','4-5 years','5-10 years','10-15 years','20+ years'

### Continuous Variables

| Column Name       | Description                                                                         |
|-------------------|-------------------------------------------------------------------------------------|
| **Age**           | What is your age (# years)?                                                         |
| **Salary**        | What is your current yearly compensation (approximate $USD)?                        |
| **ML_Cloud_Spend**| Approximately how much money have you spent on machine learning and/or cloud computing products at your work in the past 5 years? |
| **Coding_Years**  | How long have you been writing code to analyze data (at work or at school)?         |
| **ML_Years**      | For how many years have you used machine learning methods?                          |

### Categorical Variables

| Column Name     | Description                                                                       |
|-----------------|-----------------------------------------------------------------------------------|
| **Gender**      | What is your gender?                                                              |
| **Country**     | In which country do you currently reside?                                         |
| **Education**   | What is the highest level of formal education that you have attained or plan to attain within the next 2 years? |
| **Job_Title**   | Select the title most similar to your current role (or most recent title if retired) |
| **Company_Size**| What is the size of the company where you are employed?                           |
| **DS_Workload** | Approximately how many individuals are responsible for data science workloads at your place of business? |
| **ML_Use**      | Does your current employer incorporate machine learning methods into their business? |
| **Primary_Tool**| What is the primary tool that you use at work or school to analyze data?          |
| **First_Lang**  | What programming language would you recommend an aspiring data scientist to learn first? |
| **TPU_Use**     | Have you ever used a TPU (tensor processing unit)?                                 |

## Univariate Analysis for All Variables

In [101]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

# Enable offline mode for Plotly in Jupyter
init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
    df = df.to_frame(index=False)

# Remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # Update columns to strings in case they are numbers

s = df[~pd.isnull(df['What is your current yearly compensation (approximate $USD)?'])]['What is your current yearly compensation (approximate $USD)?']
chart = pd.value_counts(s).to_frame(name='data')
chart['percent'] = (chart['data'] / chart['data'].sum()) * 100
chart.index.name = 'labels'
chart = chart.reset_index().sort_values(['data', 'labels'], ascending=[False, True])
chart = chart[:100]

# Create the bar chart with percentage text on top of bars
charts = [go.Bar(
    x=chart['labels'].values, 
    y=chart['data'].values, 
    name='Frequency',
    text=chart['percent'].apply(lambda x: f'{x:.2f}%').values,  # Add percentage text on top of the bars
    textposition='outside'  # Position the text outside the bars
)]

figure = go.Figure(data=charts, layout=go.Layout({
    'barmode': 'group',
    'legend': {'orientation': 'h'},
    'title': {'text': 'Yearly Compensation Distribution'},
    'xaxis': {'title': {'text': 'What is your current yearly compensation (approximate $USD)?'}},
    'yaxis': {'title': {'text': 'Frequency'}}
}))

iplot(figure)

In [54]:
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

# Enable offline mode for Plotly in Jupyter
init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['What is your age (# years)?'],
], axis=1)
chart_data = chart_data.sort_values(['What is your age (# years)?'])
chart_data = chart_data.rename(columns={'What is your age (# years)?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True, 'title': {'text': 'Age Distribution'}
}))

iplot(figure)

In [69]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

# Enable offline mode for Plotly in Jupyter
init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['What is your gender?'],
], axis=1)
chart_data = chart_data.sort_values(['What is your gender?'])
chart_data = chart_data.rename(columns={'What is your gender?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'legend': {'orientation': 'h', 'y': 0.5}, 'title': {'text': 'Gender Distribution'}
}))

iplot(figure)

In [61]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['In which country do you currently reside?'],
], axis=1)
chart_data = chart_data.sort_values(['In which country do you currently reside?'])
chart_data = chart_data.rename(columns={'In which country do you currently reside?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()



charts = []
charts.append(go.Bar(
	x=chart_data['x'],
	y=chart_data['__index__||count']
))
figure = go.Figure(data=charts, layout=go.Layout({
    'barmode': 'group',
    'legend': {'orientation': 'h', 'y': -0.3},
    'title': {'text': 'Country Distribution'},
    'xaxis': {'title': {'text': 'Country'}},
    'yaxis': {'tickformat': '0:g', 'title': {'text': 'Count'}, 'type': 'linear'}
}))

iplot(figure)

In [74]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['What is the highest level of formal education that you have attained or plan to attain within the next 2 years?'],
], axis=1)
chart_data = chart_data.sort_values(['What is the highest level of formal education that you have attained or plan to attain within the next 2 years?'])
chart_data = chart_data.rename(columns={'What is the highest level of formal education that you have attained or plan to attain within the next 2 years?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': ('Education Distribution')}
}))

iplot(figure)

In [78]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['Select the title most similar to your current role (or most recent title if retired)'],
], axis=1)
chart_data = chart_data.sort_values(['Select the title most similar to your current role (or most recent title if retired)'])
chart_data = chart_data.rename(columns={'Select the title most similar to your current role (or most recent title if retired)': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie


chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': ('Job Title Distribution')}
}))

iplot(figure)

In [80]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['What is the size of the company where you are employed?'],
], axis=1)
chart_data = chart_data.sort_values(['What is the size of the company where you are employed?'])
chart_data = chart_data.rename(columns={'What is the size of the company where you are employed?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie


chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'legend': {'orientation': 'h', 'y': -0.3},
    'title': {'text': 'Company Size Distribution'}
}))

iplot(figure)

In [83]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['Approximately how many individuals are responsible for data science workloads at your place of business?'],
], axis=1)
chart_data = chart_data.sort_values(['Approximately how many individuals are responsible for data science workloads at your place of business?'])
chart_data = chart_data.rename(columns={'Approximately how many individuals are responsible for data science workloads at your place of business?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': ('Individual Responsible for Data Science Workload at Workplace Distribution')}
}))

iplot(figure)

In [85]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['Does your current employer incorporate machine learning methods into their business?'],
], axis=1)
chart_data = chart_data.sort_values(['Does your current employer incorporate machine learning methods into their business?'])
chart_data = chart_data.rename(columns={'Does your current employer incorporate machine learning methods into their business?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

import plotly.graph_objs as go

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': ('Machine Learning Usage in Workplace Distribution')}
}))

iplot(figure)

In [87]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['Approximately how much money have you spent on machine learning and/or cloud computing products at your work in the past 5 years?'],
], axis=1)
chart_data = chart_data.sort_values(['Approximately how much money have you spent on machine learning and/or cloud computing products at your work in the past 5 years?'])
chart_data = chart_data.rename(columns={'Approximately how much money have you spent on machine learning and/or cloud computing products at your work in the past 5 years?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

import plotly.graph_objs as go

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': ('Machine Learning/Cloud Computing Products Spent Distribution')}
}))

iplot(figure)

In [89]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['What is the primary tool that you use at work or school to analyze data?'],
], axis=1)
chart_data = chart_data.sort_values(['What is the primary tool that you use at work or school to analyze data?'])
chart_data = chart_data.rename(columns={'What is the primary tool that you use at work or school to analyze data?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': 'Data Analysis Tools Distribution'}
}))

iplot(figure)

In [92]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['How long have you been writing code to analyze data (at work or at school)?'],
], axis=1)
chart_data = chart_data.sort_values(['How long have you been writing code to analyze data (at work or at school)?'])
chart_data = chart_data.rename(columns={'How long have you been writing code to analyze data (at work or at school)?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie


chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': 'Coding Experience Distribution'}
}))

iplot(figure)

In [103]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['What programming language would you recommend an aspiring data scientist to learn first?'],
], axis=1)
chart_data = chart_data.sort_values(['What programming language would you recommend an aspiring data scientist to learn first?'])
chart_data = chart_data.rename(columns={'What programming language would you recommend an aspiring data scientist to learn first?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie


chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'legend': {'orientation': 'h', 'y': -0.3},
    'title': {'text': ('First Language Recommendation to Learn For Data Scientist Distribution')}
}))

iplot(figure)

In [98]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['Have you ever used a TPU (tensor processing unit)?'],
], axis=1)
chart_data = chart_data.sort_values(['Have you ever used a TPU (tensor processing unit)?'])
chart_data = chart_data.rename(columns={'Have you ever used a TPU (tensor processing unit)?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie

import plotly.graph_objs as go

chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'legend': {'orientation': 'h', 'y': -0.3},
    'title': {'text': 'TPU (Tensor Processing Unit) Usage Experience Distribution'}
}))

iplot(figure)

In [100]:
# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Your dataset
df = pd.read_csv('../../data/prep_survey_profile.csv')

if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):
	df = df.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
df = df.reset_index().drop('index', axis=1, errors='ignore')
df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

chart_data = pd.concat([
	pd.Series(df.index, index=df.index, name='__index__'),
	df['For how many years have you used machine learning methods?'],
], axis=1)
chart_data = chart_data.sort_values(['For how many years have you used machine learning methods?'])
chart_data = chart_data.rename(columns={'For how many years have you used machine learning methods?': 'x'})
chart_data_count = chart_data.groupby(['x'], dropna=True)[['__index__']].count()
chart_data_count.columns = ['__index__||count']
chart_data = chart_data_count.reset_index()
chart_data = chart_data.dropna()
chart_data = chart_data[chart_data['__index__||count'] > 0]  # can't represent negatives in a pie


chart = go.Pie(labels=chart_data['x'], values=chart_data['__index__||count'])
figure = go.Figure(data=[chart], layout=go.Layout({
    'showlegend': True,
    'title': {'text': 'Machine Learning Usage Experience Distribution'}
}))

iplot(figure)

## Bivariate Analysis