# Data collection using an API

In [1]:
# impprting the necessary libraries 
import requests # importing the requests library to make the API call to the server
import datetime # importing the datetime library to work with the date and time
from datetime import datetime, timedelta # importing the datetime and timedelta from the datetime library
import csv # importing the csv library to work with the csv files

In [2]:
# creating a dictionary to store the environment variables
my_dict = {}

# reading the api key from the .env file
with open(".env", "r") as f:
    for line in f:
        key,val = line.split('=')
        my_dict[key.strip()] = val.strip()

# storing the api key in a variable
api_key = my_dict['api_key']

In [3]:
# setting the url and headers for the request
url = "https://skyscanner80.p.rapidapi.com/api/v1/flights/search-one-way"
headers = {
    "X-RapidAPI-Key": api_key,  
    "X-RapidAPI-Host": "skyscanner80.p.rapidapi.com"
}

# creating a csv file to store the data
csv_file_name = 'flight_data_aar.csv'

# writing the headers to the csv file
with open(csv_file_name, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Search Date', 'Airline', 'Flight Number', 'Source City', 'Departure Time', 'Number of Stops', 'Arrival Time', 'Destination City', 'Duration (Minutes)', 'Price'])

    # using today's date as the starting point
    start_date = datetime.now()

    # looping over the next 30 days
    for offset in range(30):
        # calculing the date for the current iteration
        depart_date = start_date + timedelta(days=offset)
        # formatting the date in YYYY-MM-DD format
        formatted_date = depart_date.strftime('%Y-%m-%d')

        # updating the query string with the new date
        querystring = {
            "fromId": "eyJzIjoiQUFSIiwiZSI6Ijk1NjczOTg3IiwiaCI6IjI3NTQ3NDY0In0=",
            "toId": "eyJzIjoiTE9ORCIsImUiOiIyNzU0NDAwOCIsImgiOiIyNzU0NDAwOCJ9",
            "departDate": formatted_date,
            "adults": "1",
            "currency": "DKK",
            "market": "DK",
            "locale": "da-DK"
        }

        # making the request
        response = requests.get(url, headers=headers, params=querystring)
        
        # checking if the request was successful
        if response.status_code == 200:
            data = response.json()
            itineraries = data['data']['itineraries'] if 'data' in data and 'itineraries' in data['data'] else []

            # processing each itinerary
            for itinerary in itineraries:
                price = itinerary['price']['formatted']
                for leg in itinerary['legs']:
                    airline = leg['carriers']['marketing'][0]['name']
                    flight_number = leg['segments'][0]['flightNumber']
                    source_city = leg['origin']['city']
                    departure_time = leg['departure']
                    number_of_stops = leg['stopCount']
                    arrival_time = leg['arrival']
                    destination_city = leg['destination']['city']
                    duration = leg['durationInMinutes']

                    # writing the details to the CSV
                    writer.writerow([formatted_date, airline, flight_number, source_city, departure_time, number_of_stops, arrival_time, destination_city, duration, price])
        else:
            # printing an error message if the request was not successful
            print(f"Failed to fetch data for {formatted_date}: {response.status_code}")