# Projet Fil Rouge
### Garnier, Thibaud, 2025

In [1]:
# Package importation
import os
import requests
import zipfile
import pandas as pd
import numpy as np
import matplotlib_inline as mlp
import seaborn as sns
import requests

import DataLoader


## Data Cleaning

### Data Import

In [None]:
# --------------------------------------------------
# 1. Download the ZIP file
# --------------------------------------------------
url = "https://visionet.franceagrimer.fr/Pages/OpenDocument.aspx?fileurl=SeriesChronologiques%2fproductions%20vegetales%2fgrandes%20cultures%2fcollecte%2cstocks%2cd%c3%a9p%c3%b4ts%2fSCR-GRC-histDEP_collecte_stock_depuis_2000-C25.zip&telechargersanscomptage=oui"


zip_path = "data.zip"
response = requests.get(url, stream=True)
response.raise_for_status()                     # ensure the download succeeded

with open(zip_path, "wb") as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)

print(f"ZIP downloaded")
# --------------------------------------------------
# 2. Extract the ZIP archive
# --------------------------------------------------

with zipfile.ZipFile(zip_path, "r") as z:
    z.extractall()

print(f"ZIP extracted")

# --------------------------------------------------
# 3. Load the CSV into a DataFrame
# --------------------------------------------------
# Locate the CSV file inside the folder
# /!\ Always select the first file 
filePath =  [file for file in os.listdir() if "C25.csv" in file][0]

# Format is special: European decimal with ISO-8859-1 encoding
famStock = pd.read_csv(filePath, sep=";",encoding = "ISO-8859-1", 
                       decimal = ",",low_memory=False, dtype= {'DEP':object} )

# UnitTest
UT_loadCsv = famStock.size > 1
print(f"Unit Test load csv : {UT_loadCsv}")

# --------------------------------------------------
# 4. Save it for next use
# --------------------------------------------------

famStock.to_csv('famStocks.csv',  index = False)

print(f"Database is saved")

ZIP downloaded
ZIP extracted
Unit Test load csv : True
Database is saved


### Data information

In [6]:
filePath

'SCR-GRC-histDEP_collecte_stock_depuis_2000-C25.csv'

In [12]:
famStock.info()
famStock.head()
# 
famStock.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 329941 entries, 0 to 329940
Data columns (total 14 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   ESPECES         329941 non-null  object 
 1   DEP             329941 non-null  object 
 2   DEPARTEMENT     329941 non-null  object 
 3   REGION          329941 non-null  object 
 4   ANNEE           329941 non-null  int64  
 5   CAMPAGNE        329941 non-null  object 
 6   MOIS            329941 non-null  int64  
 7   TOTAL_COLLECTE  329941 non-null  float64
 8   STOCKS          329941 non-null  float64
 9   STOCKS_DEPOTS   329941 non-null  float64
 10  ENTREE_DEPOT    329941 non-null  float64
 11  SORTIE_DEPOT    329941 non-null  float64
 12  REPRISE_DEPOT   329941 non-null  float64
 13  Unnamed: 13     0 non-null       float64
dtypes: float64(7), int64(2), object(5)
memory usage: 35.2+ MB


Index(['ESPECES', 'DEP', 'DEPARTEMENT', 'REGION', 'ANNEE', 'CAMPAGNE', 'MOIS',
       'TOTAL_COLLECTE', 'STOCKS', 'STOCKS_DEPOTS', 'ENTREE_DEPOT',
       'SORTIE_DEPOT', 'REPRISE_DEPOT', 'Unnamed: 13'],
      dtype='object')

## Data Visualisation
Ici, vous réaliserez vos observations, créerez vos graphes dans un premier temps. Une fois qu'ils seront fait, ils seront à intégrer à votre application Streamlit. Considérez donc cet espace comme un bac à sable avant de vous lancer dans le développement de l'application de Data Visualisation.

In [None]:
# Ici, vous réaliserez la visualisation de votre jeu de données



Unnamed: 0,0,1,2,3,4
ESPECES,Avoine,Avoine,Avoine,Avoine,Avoine
DEP,01,01,01,01,01
DEPARTEMENT,AIN,AIN,AIN,AIN,AIN
REGION,AUVERGNE RHONE ALPES,AUVERGNE RHONE ALPES,AUVERGNE RHONE ALPES,AUVERGNE RHONE ALPES,AUVERGNE RHONE ALPES
ANNEE,2000,2000,2000,2000,2000
CAMPAGNE,2000/01,2000/01,2000/01,2000/01,2000/01
MOIS,7,8,9,10,11
TOTAL_COLLECTE,2245.9,461.6,76.8,21.7,28.4
STOCKS,2642.7,2886.8,2495.0,2196.9,1976.2
STOCKS_DEPOTS,151.0,198.4,193.2,191.1,183.9
