In [None]:
# req 4.1 and 4.2 Accessing loan API endpoint to bring in data
import requests
import json

# Function to fetch loan data from the provided API URL
def fetch_loan_data(api_url):
    response = requests.get(api_url)  # Send a GET request to the API URL
    if response.status_code == 200:  # Check if the request was successful
        return response.json()  # Return the JSON data from the response
    else:
        response.raise_for_status()  # Raise an error if the request was unsuccessful

def main():
    api_url = "https://raw.githubusercontent.com/platformps/LoanDataset/main/loan_data.json"  # URL of the loan data API
    loan_data = fetch_loan_data(api_url)  # Fetch loan data from the API

if __name__ == "__main__":
    main()  # Execute the main function if this script is run directly


In [None]:
# 4.3 Converting the raw data from the API to MySQL database
import requests
import json
from pyspark.sql import SparkSession
import mysql.connector

# Step 1: Fetch Data from API
# API Endpoint
url = "https://raw.githubusercontent.com/platformps/LoanDataset/main/loan_data.json"

# Fetch data from the API
response = requests.get(url)

# Print the status code of the response
print(f"Status Code: {response.status_code}")

# Check if request was successful
if response.status_code == 200:
    # Get the JSON data from the response
    loan_data = response.json()
else:
    print("Failed to fetch data from the API")
    loan_data = None  # Set loan_data to None if the request failed

# Step 2: Load Data into RDBMS using PySpark
if loan_data is not None:
    # Create Spark Session
    spark = SparkSession.builder \
        .appName("LoanApplicationData") \
        .config("spark.jars.packages", "mysql:mysql-connector-java:8.0.26") \
        .getOrCreate()

    # Convert JSON data to DataFrame
    loan_data_df = spark.read.json(spark.sparkContext.parallelize([json.dumps(loan_data)]))

    # Define JDBC connection properties
    jdbc_url = "jdbc:mysql://localhost:3306/creditcard_capstone"
    connection_properties = {
        "user": "root",
        "password": "Password",
        "driver": "com.mysql.cj.jdbc.Driver"
    }

    # Write DataFrame to MySQL database
    loan_data_df.write \
        .jdbc(url=jdbc_url, table="CDW_SAPP_loan_application", mode="overwrite", properties=connection_properties)

    # Stop Spark Session
    spark.stop()
else:
    print("No data available to load into the database.")
