# Récupération et Nettoyage des données

## Récupération des données CPIAUCNS
Consumer Price Index for All Urban Consumers: All Items in U.S. City Average 

Source: Fed Saint Louis


In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


url="https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1140&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=CPIAUCNS&scale=left&cosd=1913-01-01&coed=2024-09-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2024-10-27&revision_date=2024-10-27&nd=1913-01-01"

CPI_Urban = pd.read_csv(url, on_bad_lines='skip')

CPI_Urban.head(13)


Unnamed: 0,DATE,CPIAUCNS
0,1913-01-01,9.8
1,1913-02-01,9.8
2,1913-03-01,9.8
3,1913-04-01,9.8
4,1913-05-01,9.7
5,1913-06-01,9.8
6,1913-07-01,9.9
7,1913-08-01,9.9
8,1913-09-01,10.0
9,1913-10-01,10.0


Informations générales sur le jeu de données:

In [39]:
CPI_Urban.axes



[RangeIndex(start=0, stop=1341, step=1),
 Index(['DATE', 'CPIAUCNS'], dtype='object')]

In [40]:
CPI_Urban.dtypes

DATE         object
CPIAUCNS    float64
dtype: object

## Nettoyage des données en vue des étapes suivantes

Renommage des colonnes

In [41]:
CPI_Urban = CPI_Urban.rename(columns={'DATE':'date','CPIAUCNS': 'indice'})
CPI_Urban.axes

[RangeIndex(start=0, stop=1341, step=1),
 Index(['date', 'indice'], dtype='object')]

Traitement des dates

In [42]:

CPI_Urban['date'] = pd.to_datetime(CPI_Urban['date'])
CPI_Urban['year'] = CPI_Urban['date'].dt.year
CPI_Urban['month'] = CPI_Urban['date'].dt.month

CPI_Urban.head(10)



Unnamed: 0,date,indice,year,month
0,1913-01-01,9.8,1913,1
1,1913-02-01,9.8,1913,2
2,1913-03-01,9.8,1913,3
3,1913-04-01,9.8,1913,4
4,1913-05-01,9.7,1913,5
5,1913-06-01,9.8,1913,6
6,1913-07-01,9.9,1913,7
7,1913-08-01,9.9,1913,8
8,1913-09-01,10.0,1913,9
9,1913-10-01,10.0,1913,10


Création de la variable pourcentage d'inflation annuelle: 

Le jeu de données se présente en effet sous la forme d'indices avec une base 100 en 1983. 
Limite: pas idéal pour réaliser des stats sur une période antérieure. 

In [50]:
index_14=CPI_Urban.index[CPI_Urban['date'] == '1914-01-01'][0]
index_end=CPI_Urban.index[CPI_Urban['date'] == '1963-01-01'][0]

print(index_14,index_end)

#On ne calcule le taux d'inflation annuel que pour la période considérée
CPI_Urban['inf_an']=0

for i in range(index_14,index_end+1):
    b=CPI_Urban.loc[i,'indice']
    a=CPI_Urban.loc[i-12,'indice']
    CPI_Urban.at[i, 'inf_an']=((b-a)/a)*100

  
CPI_Urban.head(20)


12 600


  CPI_Urban.at[i, 'inf_an']=((b-a)/a)*100


Unnamed: 0,date,indice,year,month,inf_an
0,1913-01-01,9.8,1913,1,0.0
1,1913-02-01,9.8,1913,2,0.0
2,1913-03-01,9.8,1913,3,0.0
3,1913-04-01,9.8,1913,4,0.0
4,1913-05-01,9.7,1913,5,0.0
5,1913-06-01,9.8,1913,6,0.0
6,1913-07-01,9.9,1913,7,0.0
7,1913-08-01,9.9,1913,8,0.0
8,1913-09-01,10.0,1913,9,0.0
9,1913-10-01,10.0,1913,10,0.0


Création d'un sous-tableau de la période considérée

In [55]:
CPI_sub = CPI_Urban[(CPI_Urban['year'] > 1913) & (CPI_Urban['year'] < 1963)]
CPI_sub.head(10)
CPI_sub.tail(10)


Unnamed: 0,date,indice,year,month,inf_an
590,1962-03-01,30.1,1962,3,1.006711
591,1962-04-01,30.2,1962,4,1.342282
592,1962-05-01,30.2,1962,5,1.342282
593,1962-06-01,30.2,1962,6,1.342282
594,1962-07-01,30.3,1962,7,1.0
595,1962-08-01,30.3,1962,8,1.337793
596,1962-09-01,30.4,1962,9,1.333333
597,1962-10-01,30.4,1962,10,1.333333
598,1962-11-01,30.4,1962,11,1.333333
599,1962-12-01,30.4,1962,12,1.333333


## Identification des périodes d'intérêt

Identification des périodes d'inflation négative

Création d'une indicatrice inf_neg qui prend la valeur 1 si l'inflation annuelle est négative

In [61]:
CPI_sub['inf_neg']=0
CPI_sub.loc[CPI_sub['inf_an'] < 0, 'inf_neg'] = 1
CPI_sub.head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CPI_sub['inf_neg']=0


Unnamed: 0,date,indice,year,month,inf_an,inf_neg
12,1914-01-01,10.0,1914,1,2.040816,0
13,1914-02-01,9.9,1914,2,1.020408,0
14,1914-03-01,9.9,1914,3,1.020408,0
15,1914-04-01,9.8,1914,4,0.0,0
16,1914-05-01,9.9,1914,5,2.061856,0
17,1914-06-01,9.9,1914,6,1.020408,0
18,1914-07-01,10.0,1914,7,1.010101,0
19,1914-08-01,10.2,1914,8,3.030303,0
20,1914-09-01,10.2,1914,9,2.0,0
21,1914-10-01,10.1,1914,10,1.0,0
