In [None]:
import requests
import pandas as pd
import time

API_KEY = '' 
BASE_URL = 'http://api.aviationstack.com/v1/flights'

all_flights = []
limit = 100
total_records = 1000
pages = total_records // limit

for offset in range(0, total_records, limit):
    params = {
        'access_key': API_KEY,
        'limit': limit,
        'offset': offset
    }

    response = requests.get(BASE_URL, params=params)
    data = response.json()

    # Handle errors
    if 'data' not in data:
        print("Error fetching data:", data)
        break

    for flight in data['data']:
        all_flights.append({
            'airline': flight['airline']['name'],
            'flight_number': flight['flight']['number'],
            'departure_airport': flight['departure']['airport'],
            'arrival_airport': flight['arrival']['airport'],
            'departure_iata': flight['departure']['iata'],
            'arrival_iata': flight['arrival']['iata'],
            'departure_time': flight['departure']['scheduled'],
            'arrival_time': flight['arrival']['scheduled'],
            'status': flight['flight_status']
        })

    print(f"Fetched {offset + limit} / {total_records} records...")
    time.sleep(1)  # Respect API rate limits

# Convert to DataFrame
df = pd.DataFrame(all_flights)
df['departure_time'] = pd.to_datetime(df['departure_time'])
df['arrival_time'] = pd.to_datetime(df['arrival_time'])

# Save to CSV
df.to_csv("aviationstack_1000_flights.csv", index=False)
print("Saved 1000 flight records to 'aviationstack_1000_flights.csv'")


Fetched 100 / 1000 records...
Fetched 200 / 1000 records...
Fetched 300 / 1000 records...
Fetched 400 / 1000 records...
Fetched 500 / 1000 records...
Fetched 600 / 1000 records...
Fetched 700 / 1000 records...
Fetched 800 / 1000 records...
Fetched 900 / 1000 records...
Fetched 1000 / 1000 records...
Saved 1000 flight records to 'aviationstack_1000_flights.csv'


In [14]:
import pandas as pd
df = pd.read_csv("aviationstack_1000_flights.csv", parse_dates=['departure_time'])

In [16]:
df.head()

Unnamed: 0,airline,flight_number,departure_airport,arrival_airport,departure_iata,arrival_iata,departure_time,arrival_time,status
0,Hainan Airlines,7082.0,Beijing Capital International,Haikou,PEK,HAK,2025-07-30 19:00:00+00:00,2025-07-30 22:45:00+00:00,scheduled
1,Air China LTD,1379.0,Beijing Capital International,Guangzhou Baiyun International,PEK,CAN,2025-07-30 19:00:00+00:00,2025-07-30 22:20:00+00:00,scheduled
2,Hainan Airlines,7567.0,Beijing Capital International,Chongqing Jiangbei International,PEK,CKG,2025-07-30 18:55:00+00:00,2025-07-30 21:40:00+00:00,scheduled
3,Air China LTD,709.0,Beijing Capital International,Seoul (Incheon),PEK,ICN,2025-07-30 18:50:00+00:00,2025-07-30 21:45:00+00:00,scheduled
4,Air China LTD,1907.0,Beijing Capital International,Zhuhai Airport,PEK,ZUH,2025-07-30 18:45:00+00:00,2025-07-30 22:00:00+00:00,scheduled


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   airline            998 non-null    object             
 1   flight_number      982 non-null    float64            
 2   departure_airport  996 non-null    object             
 3   arrival_airport    994 non-null    object             
 4   departure_iata     1000 non-null   object             
 5   arrival_iata       1000 non-null   object             
 6   departure_time     1000 non-null   datetime64[ns, UTC]
 7   arrival_time       1000 non-null   object             
 8   status             1000 non-null   object             
dtypes: datetime64[ns, UTC](1), float64(1), object(7)
memory usage: 70.4+ KB
