In [7]:
import matplotlib.pyplot as plt
import os
import pandas as pd
from zipfile import ZipFile
import requests
from io import BytesIO

class FlightDataProcessor:
    def __init__(self, gitlab_url):
        # Download the zip file from the GitLab URL
        response = requests.get(gitlab_url)
    
        zip_file = ZipFile(BytesIO(response.content))

        # Set the destination folder for the extracted data
        destination_folder = os.path.join('group_03', 'downloads1')

        # Ensure the destination folder exists
        os.makedirs(destination_folder, exist_ok=True)

        # Extract the datasets from the zip folder
        zip_file.extractall(destination_folder)

        # Read the datasets into pandas dataframes
        self.airlines_df = pd.read_csv(os.path.join(destination_folder, 'airlines.csv'))
        self.airplanes_df = pd.read_csv(os.path.join(destination_folder, 'airplanes.csv'))
        self.airports_df = pd.read_csv(os.path.join(destination_folder, 'airports.csv'))
        self.routes_df = pd.read_csv(os.path.join(destination_folder, 'routes.csv'))

    def plot_top_airplane_models(self, country_subset=None, top_n=5):
        # Merge routes_df and airplanes_df on 'airplane_id'
        merged_df = pd.merge(self.routes_df, self.airplanes_df, left_on='airplane_id', right_on='airplane_id', how='left')

        # Filter by country_subset if provided
        if country_subset is not None:
            merged_df = merged_df[merged_df['source_country'].isin(country_subset) | merged_df['destination_country'].isin(country_subset)]

        # Group by airplane model and count routes
        airplane_counts = merged_df['airplane_model'].value_counts().nlargest(top_n)

        # Plot the results
        plt.figure(figsize=(10, 6))
        airplane_counts.plot(kind='bar', color='skyblue')
        plt.title(f'Top {top_n} Airplane Models by Number of Routes')
        plt.xlabel('Airplane Model')
        plt.ylabel('Number of Routes')
        plt.show()

# GitLab URL for the raw flight_data.zip file
gitlab_url = 'https://gitlab.com/adpro9641208/group_03/-/raw/main/flight_data.zip'

# Create an instance of FlightDataProcessor with the GitLab URL
flight_processor = FlightDataProcessor(gitlab_url)

# Example Usage
# Assuming you want to plot the top 7 airplane models for a specific country subset
flight_processor.plot_top_airplane_models(country_subset=['USA', 'Canada'], top_n=7)


KeyError: 'airplane_id'