In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from plotly import graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px

In [4]:
df = pd.read_csv('ZonAnn.Ts+dSST.csv', sep = ',')
df.head()

Unnamed: 0,Year,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
0,1880,-0.16,-0.28,-0.04,-0.38,-0.11,-0.02,-0.81,-0.47,-0.29,-0.14,-0.09,-0.04,0.05,0.68
1,1881,-0.08,-0.18,0.01,-0.36,0.12,-0.07,-0.94,-0.47,-0.2,0.11,0.12,-0.05,-0.07,0.6
2,1882,-0.11,-0.22,-0.01,-0.32,-0.04,0.01,-1.42,-0.28,-0.16,-0.04,-0.03,0.01,0.04,0.63
3,1883,-0.17,-0.28,-0.06,-0.34,-0.15,-0.01,-0.17,-0.56,-0.26,-0.16,-0.14,-0.04,0.07,0.5
4,1884,-0.28,-0.42,-0.14,-0.6,-0.13,-0.14,-1.29,-0.64,-0.46,-0.12,-0.15,-0.19,-0.02,0.65


In [5]:
# On examine les informations disponibles pour observer les types de données et éventuelles valeurs manquantes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 15 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     144 non-null    int64  
 1   Glob     144 non-null    float64
 2   NHem     144 non-null    float64
 3   SHem     144 non-null    float64
 4   24N-90N  144 non-null    float64
 5   24S-24N  144 non-null    float64
 6   90S-24S  144 non-null    float64
 7   64N-90N  144 non-null    float64
 8   44N-64N  144 non-null    float64
 9   24N-44N  144 non-null    float64
 10  EQU-24N  144 non-null    float64
 11  24S-EQU  144 non-null    float64
 12  44S-24S  144 non-null    float64
 13  64S-44S  144 non-null    float64
 14  90S-64S  144 non-null    float64
dtypes: float64(14), int64(1)
memory usage: 17.0 KB


In [6]:
# Le jeu de données semble très propre mais à toutes fins utiles, on vérifie tout de même les valeurs manquantes :
df.isna().sum()

Year       0
Glob       0
NHem       0
SHem       0
24N-90N    0
24S-24N    0
90S-24S    0
64N-90N    0
44N-64N    0
24N-44N    0
EQU-24N    0
24S-EQU    0
44S-24S    0
64S-44S    0
90S-64S    0
dtype: int64

In [7]:
# On va ignorer la colonne Year qui n'a pas d'intérêt statistique et examiner la distribution du reste des variables :

description = df.iloc[:, 1:].describe()
display(description)

Unnamed: 0,Glob,NHem,SHem,24N-90N,24S-24N,90S-24S,64N-90N,44N-64N,24N-44N,EQU-24N,24S-EQU,44S-24S,64S-44S,90S-64S
count,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0
mean,0.069514,0.100139,0.039792,0.124583,0.079306,0.002292,0.275208,0.146181,0.049653,0.0675,0.090208,0.047708,-0.053264,-0.073194
std,0.377371,0.455901,0.316516,0.55,0.349656,0.314571,1.011851,0.601926,0.43451,0.353724,0.352667,0.334835,0.276352,0.762091
min,-0.48,-0.57,-0.47,-0.66,-0.62,-0.48,-1.75,-0.78,-0.62,-0.68,-0.57,-0.43,-0.55,-2.6
25%,-0.2,-0.22,-0.21,-0.29,-0.19,-0.24,-0.3625,-0.285,-0.24,-0.2,-0.17,-0.22,-0.26,-0.53
50%,-0.045,0.0,-0.055,0.025,0.015,-0.08,0.095,0.0,-0.06,0.01,0.01,-0.06,-0.08,0.045
75%,0.28,0.2525,0.2725,0.3725,0.31,0.2625,0.7625,0.4475,0.165,0.28,0.32,0.29,0.19,0.45
max,1.17,1.49,0.85,1.78,1.05,0.71,3.27,1.87,1.47,1.07,1.06,0.91,0.44,1.35
