# Initialize

In [1]:
import requests
import mysql.connector
import os

In [2]:
#API HEALTH CHECK: connecting to api - internal port 80 (from docker)
response = requests.get("http://api:80/health")
print(response.json())

{'status': 'OK'}


In [3]:
# Columns from your dataset
columns = [
    "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology",
    "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways",
    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points", "Wilderness_Area",
    "Soil_Type", "Cover_Type"
]

In [None]:
# The FastAPI service is named 'api' in docker-compose and runs on port 80
url = "http://api:80/data"
params = {"group_number": 10}

response = requests.get(url, params=params)
print(response.json())

# Database Functions

## Create cursor

In [5]:
def get_mysql_cursor():
    """
    Establishes a MySQL connection using environment variables and returns the cursor.
    """
    host = os.getenv("MYSQL_HOST")
    port = int(os.getenv("MYSQL_PORT"))
    user = os.getenv("MYSQL_USER")
    password = os.getenv("MYSQL_PASSWORD")
    database = os.getenv("MYSQL_DATABASE")

    conn = mysql.connector.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        database=database
    )
    #returns cursor and connection
    return conn.cursor(), conn


## Create Table

### TODO: add parameter 'table_name' to create 2 tables: raw and clean data

In [24]:
def create_covertype_table():
    """
    Creates the covertype_data table in the specified MySQL database if it does not exist.
    Credentials and connection info are read from environment variables.
    """

    cursor, conn = get_mysql_cursor()
    
    create_table_sql = """
    CREATE TABLE IF NOT EXISTS covertype_data (
        id INT AUTO_INCREMENT PRIMARY KEY,
        Elevation INT,
        Aspect INT,
        Slope INT,
        Horizontal_Distance_To_Hydrology INT,
        Vertical_Distance_To_Hydrology INT,
        Horizontal_Distance_To_Roadways INT,
        Hillshade_9am INT,
        Hillshade_Noon INT,
        Hillshade_3pm INT,
        Horizontal_Distance_To_Fire_Points INT,
        Wilderness_Area VARCHAR(20),
        Soil_Type VARCHAR(20),
        Cover_Type INT
    );
    """
    try:
        cursor.execute(create_table_sql)
        conn.commit()
        print("Table 'covertype_data' created or already exists.")
    
    finally:
        cursor.close()
        conn.close()



In [25]:
# Example usage:
create_covertype_table()

Table 'covertype_data' created or already exists.


## Delete table

In [11]:
def delete_covertype_table():
    """
    Deletes the covertype_data table from the specified MySQL database if it exists.
    Credentials and connection info are read from environment variables.
    """
    cursor, conn = get_mysql_cursor()

    delete_table_sql = "DROP TABLE IF EXISTS covertype_data;"

    try:
        cursor.execute(delete_table_sql)
        conn.commit()
        print("Table 'covertype_data' deleted if it existed.")
    
    finally:
        cursor.close()
        conn.close()


In [23]:
delete_covertype_table()

Table 'covertype_data' deleted if it existed.


## Get data - Insert data

In [15]:
def fetch_and_insert_covertype_data(group_number=1):
    # Fetch data from API
    url = "http://api:80/data"
    params = {"group_number": group_number}
    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()["data"]  # Adjust if your API response structure is different

    cursor, conn = get_mysql_cursor()

    # Insert data into covertype_data table
    insert_sql = """
        INSERT INTO covertype_data (
            Elevation, Aspect, Slope, Horizontal_Distance_To_Hydrology,
            Vertical_Distance_To_Hydrology, Horizontal_Distance_To_Roadways,
            Hillshade_9am, Hillshade_Noon, Hillshade_3pm,
            Horizontal_Distance_To_Fire_Points, Wilderness_Area,
            Soil_Type, Cover_Type
        ) VALUES (
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
        )
    """
    try:
        for row in data:
            cursor.execute(insert_sql, tuple(row))
        conn.commit()
        print(f"Inserted {len(data)} rows into covertype_data.")
    finally:
        cursor.close()
        conn.close()


In [16]:
# Example usage:
fetch_and_insert_covertype_data(group_number=1)

Inserted 1162 rows into covertype_data.


## Read Table - fecth 5 random registers

In [17]:
def fetch_random_5_covertype_records():
    """
    Fetches 5 random records from the covertype_data table.
    Returns the result as a list of tuples.
    """
    cursor, conn = get_mysql_cursor()

    select_sql = """
        SELECT * FROM covertype_data
        ORDER BY RAND()
        LIMIT 5;
    """

    try:
        cursor.execute(select_sql)
        results = cursor.fetchall()
        for row in results:
            print(row)
        return results
    finally:
        cursor.close()
        conn.close()


In [None]:
fetch_random_5_covertype_records()

## Read Table - get all data

In [20]:
def fetch_all_covertype_records():
    """
    Fetches all records from the covertype_data table and prints the total number of records.
    Returns the result as a list of tuples.
    """
    cursor, conn = get_mysql_cursor()

    select_sql = "SELECT * FROM covertype_data;"

    try:
        cursor.execute(select_sql)
        results = cursor.fetchall()
        print(f"Total records in covertype_data: {len(results)}")
        #for row in results:
        #    print(row)
        return results
    finally:
        cursor.close()
        conn.close()


In [None]:
fetch_all_covertype_records()

# API Functions

## Get batch from API

In [30]:
def get_data_from_api(group_number):
    """
    Retrieves data from the API for the given group number.
    Raises an exception if the API indicates that there is no more data.
    """
    url = "http://api:80/data"
    params = {"group_number": group_number}

    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        data = response.json().get("data", [])
        print(f"Number of records retrieved on this batch: {len(data)}")
        if not data:
            raise ValueError("No more data available from the API.")

        return data

    except requests.exceptions.RequestException as e:
        print(f"API request failed: {e}")
        raise

    except ValueError as ve:
        print(f"API response error: {ve}")
        raise


In [31]:
group_number = 1 #there are 10 groups in total
batch_data = get_data_from_api(group_number)

Number of records retrieved on this batch: 1162


## Insert batch into Table (with data validation - duplicates)

### TODO: include parameter 'table_name' to save raw data in 'raw_data_table'

In [32]:
def insert_unique_covertype_data(data):
    """
    Inserts only new records into the covertype_data table.
    Checks for duplicates before inserting.
    """
    cursor, conn = get_mysql_cursor()

    # Define a query to check for existing records
    check_sql = """
        SELECT COUNT(*) FROM covertype_data
        WHERE Elevation = %s AND Aspect = %s AND Slope = %s AND
              Horizontal_Distance_To_Hydrology = %s AND Vertical_Distance_To_Hydrology = %s AND
              Horizontal_Distance_To_Roadways = %s AND Hillshade_9am = %s AND
              Hillshade_Noon = %s AND Hillshade_3pm = %s AND
              Horizontal_Distance_To_Fire_Points = %s AND Wilderness_Area = %s AND
              Soil_Type = %s AND Cover_Type = %s
    """

    insert_sql = """
        INSERT INTO covertype_data (
            Elevation, Aspect, Slope, Horizontal_Distance_To_Hydrology,
            Vertical_Distance_To_Hydrology, Horizontal_Distance_To_Roadways,
            Hillshade_9am, Hillshade_Noon, Hillshade_3pm,
            Horizontal_Distance_To_Fire_Points, Wilderness_Area,
            Soil_Type, Cover_Type
        ) VALUES (
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
        )
    """

    inserted_count = 0

    try:
        for row in data:
            cursor.execute(check_sql, tuple(row))
            exists = cursor.fetchone()[0]

            if exists == 0:
                cursor.execute(insert_sql, tuple(row))
                inserted_count += 1

        conn.commit()
        print(f"Inserted {inserted_count} new rows into covertype_data.")
    finally:
        cursor.close()
        conn.close()


In [34]:
insert_unique_covertype_data(batch_data)

Inserted 1162 new rows into covertype_data.


In [36]:
results = fetch_all_covertype_records()

Total records in covertype_data: 1162
