In [None]:
import numpy as np
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
df = pd.read_csv(os.path.join('..', 'mundus', 'data', 'mundus_article_2020.csv'), encoding='ISO-8859-1', sep=';')
df['date'] = pd.to_datetime(df['date'])
df.sort_values('views', ascending=False, inplace=True)
df.fillna('Divers', inplace=True)
df['count'] = 1
df.head()

In [None]:
len(df)

# Jeu

In [None]:
len(df['game'].unique())

In [None]:
df2 = df.groupby('game_type').sum()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df2.index, values=df2['count'], textinfo='value+percent'), 1, 1)
fig.add_trace(go.Pie(labels=df2.index, values=df2['views'], textinfo='value+percent'), 1, 2)


fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text='Statistiques 2020 par famille de jeu', separators = ', .',
    annotations=[dict(text='Articles', x=0.18, y=0.5, font_size=20, showarrow=False),
                 dict(text='Vues', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
df['game2'] = 'Autres jeux'
for game in ['Crusader Kings 3', 'Europa Universalis IV', 'Stellaris', 'Medieval II : Total War', 'Hearts of Iron IV', 'Total War : Warhammer II']:
    df.loc[df['game'] == game, 'game2'] = game
df2 = df.groupby('game2').sum()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df2.index, values=df2['count'], textinfo='value+percent'), 1, 1)
fig.add_trace(go.Pie(labels=df2.index, values=df2['views'], textinfo='value+percent'), 1, 2)


fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text='Statistiques 2020 des jeux à 10 articles ou plus', separators = ', .',
    annotations=[dict(text='Articles', x=0.18, y=0.5, font_size=20, showarrow=False),
                 dict(text='Vues', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

# Views by game

In [None]:
fig = px.box(df[df['game_type'] == 'Grande Stratégie'], x='game', y='views',
             title='Vues des articles 2020 de Grande Stratégie')
fig.show()

In [None]:
fig = px.box(df[df['game_type'] == 'Total War'], x='game', y='views',
             title='Vues des articles 2020 de Total War')
fig.show()

In [None]:
fig = px.box(df[df['game_type'] == 'Gestion'], x='game', y='views',
             title='Vues des articles 2020 de Gestion')
fig.show()

# Rédacteurs

In [None]:
len(df['author'].unique())

In [None]:
df3 = df.groupby('author').sum()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df3.index, values=df3['count'], textinfo='value+percent'), 1, 1)
fig.add_trace(go.Pie(labels=df3.index, values=df3['views'], textinfo='value+percent'), 1, 2)


fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text='Statistiques 2020 par rédacteur', separators = ', .',
    annotations=[dict(text='Articles', x=0.18, y=0.5, font_size=20, showarrow=False),
                 dict(text='Vues', x=0.81, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
fig = px.box(df, x='author', y='views', title='Vues des articles 2020 par rédacteur')
fig.show()

# Jour

In [None]:
df['day_of_year'] = df['date'].dt.dayofyear
df2 = df.groupby('day_of_year').count()['count']
df2 = pd.DataFrame(np.arange(365)).join(df2).fillna(0)
fig = px.histogram(df2, x='count')
fig.update_xaxes(title_text='Nombre d\'articles dans la journée')
fig.update_xaxes(title_text='Nombre de jours')
fig.update_layout(title=dict(text='Histogramme du nombre d\'articles par jours'))

In [None]:
df[df['day_of_year'] == df2[df2['count'] == df2['count'].max()][0].values[0]]

# 2017- 2020

In [None]:
all_df = pd.read_csv(os.path.join('..', 'mundus', 'data', 'mundus_article_2017_2020.csv'), encoding='ISO-8859-1')
all_df['date'] = pd.to_datetime(all_df['date'])
all_df.sort_values('views', ascending=False, inplace=True)
all_df.fillna('Divers', inplace=True)
all_df['count'] = 1
all_df.head(50)[['author', 'date', 'title', 'views']]