In [1]:
import pandas as pd

# Mission 1 : Explorer les Données (Débutant)

In [2]:
# 1 - Charger les données et afficher les 5 premières lignes.
df = pd.read_csv('game_data_complete.csv', sep=',')
df.head(100)

Unnamed: 0,Date,ID_Joueur,Temps_Jeu,Victoires,Achats_Objets,Satisfaction,Région,Serveur,Latitude,Longitude
0,2023-01-07,1943,100.913289,1,0,71.3,OC,Stable,30.432320,96.947262
1,2023-01-20,1943,103.048186,1,50,75.9,EU,Stable,5.541137,-89.395196
2,2023-01-29,1943,176.132234,1,50,86.4,AS,Stable,-54.947640,-37.501899
3,2023-01-15,1943,94.473477,0,0,94.6,AF,Stable,-66.108038,-162.469859
4,2023-01-11,1943,193.236652,3,50,55.9,AF,Stable,-34.836281,-134.227974
...,...,...,...,...,...,...,...,...,...,...
95,2023-01-29,1182,134.418258,0,0,67.7,AS,Dégradé,82.900285,76.750334
96,2023-01-18,1182,163.396899,1,50,91.9,EU,Stable,60.789729,-172.086268
97,2023-01-26,1182,106.204222,3,0,71.3,EU,Stable,-74.227976,156.736426
98,2023-01-12,1182,107.110951,2,50,92.6,SA,Stable,-72.560563,85.238602


In [3]:
# 2 - Identifier les colonnes avec des valeurs manquantes et leur proportion.
df.info()

df = df.drop_duplicates()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           15000 non-null  object 
 1   ID_Joueur      15000 non-null  int64  
 2   Temps_Jeu      14250 non-null  float64
 3   Victoires      15000 non-null  int64  
 4   Achats_Objets  15000 non-null  int64  
 5   Satisfaction   14250 non-null  float64
 6   Région         11826 non-null  object 
 7   Serveur        15000 non-null  object 
 8   Latitude       15000 non-null  float64
 9   Longitude      15000 non-null  float64
dtypes: float64(4), int64(3), object(3)
memory usage: 1.1+ MB


In [4]:
# 3 - Compter le nombre unique de joueurs.
df['ID_Joueur'].nunique()

500

In [5]:
# 4 - Vérifiez les statistiques globales (moyennes, valeurs minimales et maximales).
df.describe()

Unnamed: 0,ID_Joueur,Temps_Jeu,Victoires,Achats_Objets,Satisfaction,Latitude,Longitude
count,15000.0,14250.0,15000.0,15000.0,14250.0,15000.0,15000.0
mean,1494.598,120.235668,1.139267,54.273333,75.008926,0.142723,-0.623209
std,291.007357,30.038566,1.059975,112.281992,14.321214,52.012019,103.976793
min,1001.0,30.0,0.0,0.0,50.0,-89.958798,-179.912479
25%,1234.75,99.957081,0.0,0.0,62.8,-45.112024,-91.142225
50%,1501.5,120.196449,1.0,0.0,75.0,0.531799,-0.668055
75%,1741.75,140.641696,2.0,50.0,87.5,44.547058,89.220585
max,1999.0,254.372528,4.0,500.0,100.0,89.985052,179.970215


In [6]:
# 5 - Trouvez les jours où les serveurs sont hors ligne par région.
date_status_serveur_par_region = df[['Date', 'Serveur', 'Région']]

jours_hors_ligne = date_status_serveur_par_region[date_status_serveur_par_region['Serveur'] == 'Hors-Ligne']

jours_hors_ligne

Unnamed: 0,Date,Serveur,Région
14,2023-01-24,Hors-Ligne,AS
30,2023-01-01,Hors-Ligne,SA
51,2023-01-23,Hors-Ligne,OC
87,2023-01-04,Hors-Ligne,AF
101,2023-01-30,Hors-Ligne,OC
...,...,...,...
14937,2023-01-23,Hors-Ligne,SA
14938,2023-01-19,Hors-Ligne,SA
14947,2023-01-24,Hors-Ligne,OC
14949,2023-01-03,Hors-Ligne,AF


In [8]:
import plotly.graph_objects as go

# 6 - Filtrez les données pour ces jours et analysez leur impact sur les achats.
data_for_analysis = df[['Date', 'Achats_Objets', 'Serveur', 'Région', 'Satisfaction']]

data_for_analysis['Date'] = pd.to_datetime(data_for_analysis['Date']).dt.day

regions = data_for_analysis['Région'].unique()

colors = ['aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure','beige', 'bisque', 'black', 'blanchedalmond', 'blue','blueviolet', 'brown', 'burlywood', 'cadetblue']

regions_color = {region: colors[i] for i, region in enumerate(regions)}

achats_per_date_region = data_for_analysis.groupby(['Date', 'Région'])['Achats_Objets'].sum().reset_index()
satisfaction_per_date_region = data_for_analysis.groupby(['Date', 'Région'])['Satisfaction'].mean().reset_index()

data_per_region = []
for region in regions:
    achats_grouped_data = achats_per_date_region[achats_per_date_region['Région'] == region]
    satisfaction_group_data = satisfaction_per_date_region[satisfaction_per_date_region['Région'] == region]
    if not achats_grouped_data.empty and not satisfaction_group_data.empty:
        region_data = {
            'region': region,
            'color': regions_color[region],
            'achats_objets_per_day': achats_grouped_data,
            'satisfaction_per_day': satisfaction_group_data
        }
        data_per_region.append(region_data)

fig = go.Figure()
for region_data in data_per_region:

    region = region_data['region']
    color = region_data['color']
    dates_per_region = region_data['achats_objets_per_day']['Date']
    sum_achats_objets = region_data['achats_objets_per_day']['Achats_Objets']

    fig.add_trace(
        go.Scatter(
            x=dates_per_region,
            y=sum_achats_objets,
            name='Achats Objets ' + str(region),
            mode='lines+markers',
            line_color=color,
        )
    )

fig.update_layout(
        title=dict(
            text='Achats d\'objets par région chaques jours du mois de Janvier'
        ),
        xaxis_title='Date',
        yaxis_title='Achats Objets',
        showlegend=True,
)

fig.show()

fig = go.Figure()

for region_data in data_per_region:

        region = region_data['region']
        color = region_data['color']
        dates_per_region = region_data['satisfaction_per_day']['Date']
        mean_satisfaction = region_data['satisfaction_per_day']['Satisfaction']

        fig.add_trace(
            go.Scatter(
                x=dates_per_region,
                y=mean_satisfaction,
                name='Satisfaction ' + str(region),
                mode='lines+markers',
                line_color=color,
            )
        )

fig.update_layout(
            title=dict(
                text='Satisfaction par région chaques jours du mois de Janvier'
            ),
            xaxis_title='Date',
            yaxis_title='Satisfaction',
            showlegend=True,
        )

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Mission 2 : Analyse des Joueurs Actifs (Intermédiaire)

In [None]:
# 1 - Calculer le temps total de jeu par joueur.

df = df.groupby('ID_Joueur')['Temps_jeu'].transform('sum')

In [None]:
# 2 - Afficher les 5 joueurs les plus actifs.

In [None]:
# 3 - Identifier les joueurs ayant une satisfaction inférieure à 60.