<a href="https://colab.research.google.com/github/JordanHolland94/Colab/blob/main/One_Chart_a_Day.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# One Chart a Day

## Libraries

In [1]:
# from google.colab import drive
# drive.mount('/content/drive/')
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
!pip install --upgrade plotly



## Helper Functions

In [2]:
def gen_legend_map(df, column_name, column_label, labels):
    '''
    Used to generate a legend map for relabeling Plotly chart legends.
    '''
    x = [column_label + '='] * len(df[column_name].unique())
    y = df[column_name].unique().tolist()
    z = [i + j for i, j in zip(x, y)]

    return dict(zip(z, labels))

## 12/16/2021

In [3]:
df = px.data.tips()
fig = px.scatter(data_frame = df,
                 x = 'total_bill',
                 y = 'tip',
                 color = 'sex',
                 title = 'Tip by Total Bill',
                 labels = {'total_bill': 'Total Bill',
                           'tip': 'Tip',
                           'sex': 'Sex'},
                 trendline = 'lowess'
)

# Commenting out this part because it looks like Plotly's default behavior changed and it's no longer needed
# legend_map = {'sex=Female': 'Female', 'sex=Male': 'Male'}
# fig.for_each_trace(lambda t: t.update(name = legend_map[t.name]))  # Renames legend entries

fig.show()

  import pandas.util.testing as tm


## 12/17/2021

Things to solve
*   Update legend ✅
*   Fix axes (appropriate ranges) ✅
*   Remove double 0 origin point ✅
*   Update labels ✅ - made a helper function
*   Change year=1952 to just the year

In [4]:
df = px.data.gapminder()
df = df.groupby(['continent', 'year'])[['lifeExp', 'pop', 'gdpPercap']].mean().reset_index()

fig = px.scatter(data_frame = df,
                 x = 'lifeExp',
                 y = 'gdpPercap',
                 size = 'pop',
                 color = 'continent',
                 animation_group = 'continent',
                 animation_frame = 'year',
                 title = 'Life Expectancy and GDP Over Time by Continent',
                 labels = {'gdpPercap': 'GDP per Capita',
                           'lifeExp': 'Life Expectancy',
                           'pop': 'Population',
                           'continent': 'Continent',
                           'year': 'Year'})  # Specificying all of the labels helps clean up the hover tooltip. Renaming the columns ahead of time is another solution.)

fig.update_xaxes(range = [0, max(df['lifeExp']) + 10], tickvals = list(range(10, 91, 10)))  # Manually setting the tickvals prevents 0 from showing up on both the x and y axes' labels
fig.update_yaxes(range = [0, max(df['gdpPercap']) + 5000])

# Commenting out this part because it looks like Plotly's default behavior changed and it's no longer needed
# legend_map = gen_legend_map(df, 'continent', 'Continent', ['Africa', 'Americas', 'Asia', 'Europe', 'Oceania'])
# fig.for_each_trace(lambda t: t.update(name = legend_map[t.name]))  # Renames legend entries

fig.show()

## 12/18/2021

Need to make this USA map specific and get animation frames to work properly.

In [5]:
df = pd.read_csv('https://raw.githubusercontent.com/JordanHolland94/Colab/main/Data/electoral_votes.csv')
df['Votes'].fillna(0, inplace = True)

In [6]:
px.scatter_geo(data_frame = df,
               locationmode = 'USA-states',
               locations = 'State',
               color = 'State',
               size = 'Votes',
               animation_frame = 'Year',
               animation_group = 'State')

## 12/19/2021

In [7]:
# Code used to grab 2020 NFL standings 
# Saved in case link/process breaks
# division_mapping = {
#     # AFC
#     'Baltimore Ravens': 'AFC North',
#     'Buffalo Bills': 'AFC East',
#     'Cincinnati Bengals': 'AFC North',
#     'Cleveland Browns': 'AFC North',
#     'Denver Broncos': 'AFC West',
#     'Houston Texans': 'AFC South',
#     'Indianapolis Colts': 'AFC South',
#     'Jacksonville Jaguars': 'AFC South',
#     'Kansas City Chiefs': 'AFC West',
#     'Las Vegas Raiders': 'AFC West',
#     'Los Angeles Chargers': 'AFC West',
#     'Miami Dolphins': 'AFC East',
#     'New England Patriots': 'AFC East',
#     'New York Jets': 'AFC East',
#     'Pittsburgh Steelers': 'AFC North',
#     'Tennessee Titans': 'AFC South',
#     # NFC
#     'Arizona Cardinals': 'NFC West',
#     'Atlanta Falcons': 'NFC South',
#     'Carolina Panthers': 'NFC South',
#     'Chicago Bears': 'NFC North',
#     'Dallas Cowboys': 'NFC East',
#     'Detroit Lions': 'NFC North',
#     'Green Bay Packers': 'NFC North',
#     'Los Angeles Rams': 'NFC West',
#     'Minnesota Vikings': 'NFC North',
#     'New Orleans Saints': 'NFC South',
#     'New York Giants': 'NFC East',
#     'Philadelphia Eagles': 'NFC East',
#     'San Francisco 49ers': 'NFC West',
#     'Seattle Seahawks': 'NFC West',
#     'Tampa Bay Buccaneers': 'NFC South',
#     'Washington Football Team': 'NFC East'
# }

# def clean_standings(standings_url, table_number, conference):
#     df = pd.read_html(standings_url)[table_number]
#     df = df[df.columns[:8]]
#     df.columns = ['Team', 'Wins', 'Losses', 'Ties', 'WinLossPerc', 'PointsFor', 'PointsAgainst', 'PointDifferential']
#     df = df[~df.index.isin(list(range(0, len(df), 5)))].reset_index(drop = True)
#     df.loc[:, 'Team'] = df['Team'].str.replace('[^a-zA-Z0-9 ]', '', regex = True)  # Regex portion - ^ means DON'T match a-z, A-Z, 0-9, or spaces. Everything else gets returned and replaced.
#     df['Division'] = df['Team'].map(division_mapping)
#     df['Conference'] = conference

#     return df

# afc_standings = clean_standings('https://www.pro-football-reference.com/years/2020/', 0, 'AFC')
# nfc_standings = clean_standings('https://www.pro-football-reference.com/years/2020/', 1, 'NFC')

# nfl_standings = afc_standings.append(nfc_standings).reset_index(drop = True)

# for column in ['Wins', 'Losses', 'Ties', 'PointsFor', 'PointsAgainst', 'PointDifferential']:
#     nfl_standings.loc[:, column] = nfl_standings[column].astype('int64')

# nfl_standings.to_csv('/content/drive/MyDrive/Colab Notebooks/One Chart a Day/Data/nfl_standings_2020.csv')

nfl_standings = pd.read_csv('https://raw.githubusercontent.com/JordanHolland94/Colab/main/Data/nfl_standings_2020.csv')

In [8]:
fig = px.sunburst(data_frame = nfl_standings,
                  path = ['Conference', 'Division', 'Team'],
                  color = 'Wins',
                  color_continuous_scale = 'RdBu',
                  height = 1080,
                  title = 'NFL Wins - 2020')

fig.show()

## 12/20/2021

In [9]:
# Treemap of Wheel of Time book lengths
# Data gathered from Wikipedia
# df = pd.read_html('https://en.wikipedia.org/wiki/The_Wheel_of_Time')[1][['Title', 'Length']][0:15]
# df['Length'] = df['Length'].str.slice(start = 23, stop = 30).str.replace(',', '').astype('int64')
# df['Series'] = 'Wheel of Time'

# df.to_csv('/content/drive/MyDrive/Colab Notebooks/One Chart a Day/Data/wot_lengths.csv')

df = pd.read_csv('https://raw.githubusercontent.com/JordanHolland94/Colab/main/Data/wot_lengths.csv')

In [10]:
fig = px.treemap(data_frame = df,
                 parents = 'Series',
                 names = 'Title',
                 values = 'Length',
                 color = 'Length',
                 color_continuous_scale = 'haline',
                 title = 'Wheel of Time Book Lengths')

fig.show()

## 12/21/2021

Basic chart, but add a toggle
Using Colab's tools is not as interactive as I would like, so figuring out Plotly's method might be better

In [11]:
df = px.data.iris()

In [12]:
Dropdown = 'versicolor' #@param ['setosa', 'versicolor', 'virginica']

fig = px.scatter(data_frame = df.loc[df['species'] == Dropdown, :],
                 x = 'sepal_width',
                 y = 'sepal_length',
                 color = 'species',
                 labels = {
                     'species': 'Species',
                     'sepal_width': 'Sepal Width',
                     'sepal_length': 'Sepal Length'
                 })

fig.show()

## 12/22/2021
Three Houses base stats, facet by men/women

In [13]:
df = pd.read_csv('https://raw.githubusercontent.com/JordanHolland94/Colab/main/Data/golden_deer_stats.csv')  # Data originally pulled from here: https://serenesforest.net/three-houses/characters/base-stats/
# Z score normalization
for column in df.columns[1:]:
    df[column] = (df[column] - df[column].mean()) / df[column].std()

df = df.melt(id_vars = 'Name', var_name = 'Stat', value_name = 'Base Value')
df.sort_values(['Name', 'Stat'], inplace = True)

golden_deer_men = ['Claude', 'Ignatz', 'Lorenz', 'Raphael']
golden_deer_women = ['Hilda', 'Leonie', 'Lysithea', 'Marianne']

In [14]:
fig = make_subplots(rows = 1,
                    cols= 2,
                    specs = [[{'type': 'scatterpolar'}, {'type': 'scatterpolar'}]],
                    subplot_titles = ['Men', 'Women'])

categories = sorted(df['Stat'].unique().tolist())

for character in golden_deer_men:
    fig.add_trace(go.Scatterpolar(
          r = df.loc[df['Name'] == character, 'Base Value'],
          theta = categories,
          fill = 'toself',
          name = character,
          legendgroup = '1',
          opacity = .4
    ),
    row = 1,
    col = 1)

for character in golden_deer_women:
    fig.add_trace(go.Scatterpolar(
          r = df.loc[df['Name'] == character, 'Base Value'],
          theta = categories,
          fill = 'toself',
          name = character,
          legendgroup = '2',
          opacity = .4
    ),
    row = 1,
    col = 2)

fig.update_layout(title = 'Fire Emblem Three Houses - Base Stats - Golden Deer')

fig.show()

## 12/23/202`
NFL Parity

In [15]:
# Code used to generate the original file
# seasons = list(range(1970, 2022))
# base_url = 'https://www.pro-football-reference.com/years/'

# def clean_standings(standings_url, table_number, season):
#     df = pd.read_html(standings_url + str(season))[table_number]
#     df = df[['Tm', 'W-L%']]
#     df.columns = ['Team', 'WinLossPerc']
#     df = df.loc[~df['WinLossPerc'].str.replace(' ', '').str.isalpha(), :]  # Remove rows with division labels
#     df.loc[:, 'Team'] = df['Team'].str.replace('[^a-zA-Z0-9 ]', '', regex = True)  # Regex portion - ^ means DON'T match a-z, A-Z, 0-9, or spaces. Everything else gets returned and replaced.
#     df['Season'] = season

#     return df

# nfl_standings = pd.DataFrame()
# for season in seasons:
#     afc_standings = clean_standings(base_url, 0, season)
#     nfc_standings = clean_standings(base_url, 1, season)

#     nfl_standings = nfl_standings.append(afc_standings.append(nfc_standings))


# nfl_standings['WinLossPerc'] = nfl_standings['WinLossPerc'].astype('float64')

nfl_standings = pd.read_csv('https://raw.githubusercontent.com/JordanHolland94/Colab/main/Data/nfl_parity.csv')

In [16]:
px_df = nfl_standings.groupby('Season')['WinLossPerc'].std().reset_index()

px_df['WinLossPerc'] = (px_df['WinLossPerc'] - px_df['WinLossPerc'].mean()) / px_df['WinLossPerc'].std()

fig = px.scatter(data_frame = px_df,
                 x = 'Season',
                 y = 'WinLossPerc',
                 labels = {'WinLossPerc': 'Winning Percentage'},
                 title = 'NFL Parity - Standard Deviation of Winning Percentage<br><sup>Z Score Normalized</sup>',
                 template = 'simple_white'
).update_traces(marker = dict(color='MediumSeaGreen'))

fig.show()