In [4]:
from bs4 import BeautifulSoup
import requests
from minio import Minio
from io import BytesIO
from datetime import datetime

def setup_minio_client():
    minio_client = Minio('host.docker.internal:9000',
                         access_key='ROOTUSER',
                         secret_key='DATAINCUBATOR',
                         secure=False)
    if not minio_client.bucket_exists('bronze'):
        minio_client.make_bucket('bronze')
        print("Bucket 'bronze' created successfully")
    return minio_client
minio_client = setup_minio_client()

Bucket 'bronze' created successfully


In [None]:
def scrape_flight_data():
    url = "https://www.flightradar24.com/data/airports/jfk"
    response = requests.get(url)

    if response.status_code == 200:
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')
        flight_rows = soup.find_all('tr', class_='data-row')
        
        flight_data = []
        for row in flight_rows:
            flight_number = row.find('td', class_='flight-number').text.strip()
            origin = row.find('td', class_='origin').text.strip()
            destination = row.find('td', class_='destination').text.strip()
            departure_time = row.find('td', class_='departure-time').text.strip()
            arrival_time = row.find('td', class_='arrival-time').text.strip()
            flight_data.append({
                'flight_number': flight_number,
                'origin': origin,
                'destination': destination,
                'departure_time': departure_time,
                'arrival_time': arrival_time
            })
        
        return flight_data
    else:
        print(f"Failed to fetch page, status code: {response.status_code}")
        return None


In [None]:
import csv

def save_flight_data_to_minio(flight_data, minio_client):
    current_datetime = datetime.now().strftime('%Y%m%d')
    object_name = f'flights_data_{current_datetime}.csv'

    csv_data = BytesIO()
    writer = csv.DictWriter(csv_data, fieldnames=["flight_number", "origin", "destination", "departure_time", "arrival_time"])
    writer.writeheader()
    for flight in flight_data:
        writer.writerow(flight)
    
    csv_data.seek(0)
    
    try:
        minio_client.put_object(
            'bronze', object_name, csv_data, len(csv_data.getvalue()))
        print(f"Flight data saved successfully as {object_name}")
    except Exception as e:
        print("An error occurred while uploading to Minio:", e)


In [None]:
def main():
    flight_data = scrape_flight_data()
    
    if flight_data:
        save_flight_data_to_minio(flight_data, minio_client)

if __name__ == "__main__":
    main()
