### Step 1: Install Kaggle
First, install the Kaggle library using pip. Open a Command Prompt or PowerShell and run:

pip install kaggle

### Step 2: Get Your Kaggle API Token

Go to the Kaggle website and log in.
Click on your profile picture in the top right corner and select "My Account".
Scroll down to the "API" section and click on "Create New API Token". 
This will download a file named kaggle.json to your computer.

### Step 3: Set Up the Kaggle API Token on Windows
Place the kaggle.json file in the appropriate directory. 
For Windows, this is usually C:\Users\<Your_Username>\.kaggle. 
If the .kaggle directory does not exist, create it.

In [1]:
import os
import kaggle
import zipfile
import pandas as pd

import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

# Set up the Kaggle API client
kaggle.api.authenticate()

### Download Road Accidents In France Datasets Using Kaggle API

In [2]:
def download_kaggle_dataset(dataset, file_name, save_path):
    """
    Download a specific file from a Kaggle dataset and unzip it.

    Args:
        dataset (str): The Kaggle dataset identifier (e.g., 'username/dataset-name').
        file_name (str): The name of the file to download from the dataset.
        save_path (str): The directory where the dataset will be saved.

    Returns:
        str: The file path of the downloaded file, or None if the download fails.
    
    Raises:
        Exception: Prints an error message if the download or extraction fails.
    """
    try:
        # Download the dataset
        kaggle.api.dataset_download_file(dataset, file_name, path=save_path)
        print(f"Dataset {file_name} downloaded successfully.")

        # Construct the path to the downloaded zip file
        zip_file_path = os.path.join(save_path, f"{file_name}.zip")

        # Check if the file is a zip file and unzip it
        if os.path.exists(zip_file_path):
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(save_path)
            print(f"Dataset {file_name} unzipped successfully.")
            os.remove(zip_file_path)  # Remove the zip file after extraction
        
        # Construct the file path
        file_path = os.path.join(save_path, file_name)
        
        return file_path
    
    except Exception as e:
        print(f"Error downloading the dataset: {e}")
        return None


In [3]:
dataset = 'ahmedlahlou/accidents-in-france-from-2005-to-2016'  # Dataset identifier
file_names = ['caracteristics.csv','holidays.csv', 'places.csv', 'users.csv', 'vehicles.csv'] # Datasets 
save_path = './src/data/accidents_data'    # Directory to save the files

for file_name in file_names:
    file_path = download_kaggle_dataset(dataset, file_name, save_path)
    if file_path is not None:
        print(f"{file_name} downloaded and saved to {file_path}")
    else:
        print(f"Failed to download {file_name}")


Dataset URL: https://www.kaggle.com/datasets/ahmedlahlou/accidents-in-france-from-2005-to-2016
Dataset caracteristics.csv downloaded successfully.
Dataset caracteristics.csv unzipped successfully.
caracteristics.csv downloaded and saved to ./src/data/accidents_data\caracteristics.csv
Dataset URL: https://www.kaggle.com/datasets/ahmedlahlou/accidents-in-france-from-2005-to-2016
Dataset holidays.csv downloaded successfully.
holidays.csv downloaded and saved to ./src/data/accidents_data\holidays.csv
Dataset URL: https://www.kaggle.com/datasets/ahmedlahlou/accidents-in-france-from-2005-to-2016
Dataset places.csv downloaded successfully.
Dataset places.csv unzipped successfully.
places.csv downloaded and saved to ./src/data/accidents_data\places.csv
Dataset URL: https://www.kaggle.com/datasets/ahmedlahlou/accidents-in-france-from-2005-to-2016
Dataset users.csv downloaded successfully.
Dataset users.csv unzipped successfully.
users.csv downloaded and saved to ./src/data/accidents_data\users.

### Create DataFrame from CSV file

In [4]:

def load_csv_to_dataframe(file_path):
    """
    Load a CSV file into a pandas DataFrame.

    Args:
        file_path (str): The path to the CSV file to be loaded.

    Returns:
        pd.DataFrame or None: 
            A pandas DataFrame containing the data from the CSV file if successful, 
            or None if an error occurs.

    Raises:
        FileNotFoundError: If the specified file does not exist.
        pd.errors.EmptyDataError: If the CSV file is empty.
        pd.errors.ParserError: If the file cannot be parsed as a CSV.
        Exception: Catches any other unexpected errors.
    """
    
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path, encoding='ISO-8859-1')
        print(f"DataFrame loaded successfully from {file_path}")
        return df
    except FileNotFoundError:
        print(f"Error: The file at {file_path} does not exist.")
        return None
    except pd.errors.EmptyDataError:
        print("Error: The file is empty.")
        return None
    except pd.errors.ParserError:
        print("Error: The file could not be parsed.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

In [5]:
# path to datasets
file_path_caracteristics = 'src/data/accidents_data/caracteristics.csv'  
file_path_holidays = 'src/data/accidents_data/holidays.csv'  
file_path_places = 'src/data/accidents_data/places.csv'  
file_path_users = 'src/data/accidents_data/users.csv'  
file_path_vehicles = 'src/data/accidents_data/vehicles.csv'  

# create dataframe from csv files
caracteristics_df = load_csv_to_dataframe(file_path_caracteristics)
holidays_df = load_csv_to_dataframe(file_path_holidays)
places_df = load_csv_to_dataframe(file_path_places)
users_df = load_csv_to_dataframe(file_path_users)
vehicles_df = load_csv_to_dataframe(file_path_vehicles)

DataFrame loaded successfully from src/data/accidents_data/caracteristics.csv
DataFrame loaded successfully from src/data/accidents_data/holidays.csv
DataFrame loaded successfully from src/data/accidents_data/places.csv
DataFrame loaded successfully from src/data/accidents_data/users.csv
DataFrame loaded successfully from src/data/accidents_data/vehicles.csv


In [6]:
caracteristics_df.head()

Unnamed: 0,Num_Acc,an,mois,jour,hrmn,lum,agg,int,atm,col,com,adr,gps,lat,long,dep
0,201600000001,16,2,1,1445,1,2,1,8.0,3.0,5.0,"46, rue Sonneville",M,0.0,0.0,590
1,201600000002,16,3,16,1800,1,2,6,1.0,6.0,5.0,1a rue du cimetière,M,0.0,0.0,590
2,201600000003,16,7,13,1900,1,1,1,1.0,6.0,11.0,,M,0.0,0.0,590
3,201600000004,16,8,15,1930,2,2,1,7.0,3.0,477.0,52 rue victor hugo,M,0.0,0.0,590
4,201600000005,16,12,23,1100,1,2,3,1.0,3.0,11.0,rue Joliot curie,M,0.0,0.0,590
