In [6]:
import requests
import pandas as pd

# Use the default API key
api_key = "c6d622616af71712b32b3d31695efb70"  # Default API key

# API endpoint for Current Weather Data API
city = "London"  # Replace with your desired city
api_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"

try:
    # Fetch data from the API
    response = requests.get(api_url)
    response.raise_for_status()  # Raise an exception for HTTP errors

    # Convert API response to JSON
    api_data = response.json()
    
    # Normalize the JSON data into a DataFrame
    df_api = pd.json_normalize(api_data)
    
    # Display the API data
    print("API Data:")
    print(df_api.head())

except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"An error occurred: {err}")

API Data:
                                             weather      base  visibility  \
0  [{'id': 801, 'main': 'Clouds', 'description': ...  stations       10000   

           dt  timezone       id    name  cod  coord.lon  coord.lat  ...  \
0  1741762823         0  2643743  London  200    -0.1257    51.5085  ...   

   main.grnd_level  wind.speed  wind.deg  wind.gust  clouds.all  sys.type  \
0             1000        0.89       348       1.34          18         2   

    sys.id  sys.country  sys.sunrise  sys.sunset  
0  2091269           GB   1741760480  1741802358  

[1 rows x 27 columns]


In [7]:
import pandas as pd
import sqlite3
import requests

# Step 1: Extract - Load data from different file types and API
# CSV Data
csv_url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df_csv = pd.read_csv(csv_url)

# JSON Data
json_url = "https://jsonplaceholder.typicode.com/users"
df_json = pd.read_json(json_url)

# API Data (Weather Data)
api_key = "c6d622616af71712b32b3d31695efb70"  # Default API key
city = "London"  # Replace with your desired city
api_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"

try:
    response = requests.get(api_url)
    response.raise_for_status()  # Raise an exception for HTTP errors
    api_data = response.json()
    df_api = pd.json_normalize(api_data)
    print("API Data:")
    print(df_api.head())
except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"An error occurred: {err}")

# Step 2: Transform - Data Cleaning and Merging
# Clean CSV data
df_csv_cleaned = df_csv[['PassengerId', 'Name', 'Sex', 'Age', 'Fare']].dropna()

# Clean JSON data
df_json_cleaned = df_json[['id', 'name', 'email']].rename(columns={'name': 'employee_name'})

# Merge datasets for demonstration (Join on mocked column)
df_csv_cleaned['id'] = df_csv_cleaned['PassengerId'] % 10  # Mock 'id' for merging
df_transformed = pd.merge(df_csv_cleaned, df_json_cleaned, on='id', how='inner')

# Step 3: Load - Create and Insert into SQLite Database
conn = sqlite3.connect('data_warehouse.db')

# Load transformed data into SQLite
df_transformed.to_sql('transformed_data', conn, if_exists='replace', index=False)

# Verify the data in the data warehouse
print("\nTransformed Data (ETL):")
print(pd.read_sql('SELECT * FROM transformed_data LIMIT 5', conn))

# Close connection
conn.close()

API Data:
                                             weather      base  visibility  \
0  [{'id': 801, 'main': 'Clouds', 'description': ...  stations       10000   

           dt  timezone       id    name  cod  coord.lon  coord.lat  ...  \
0  1741763444         0  2643743  London  200    -0.1257    51.5085  ...   

   main.grnd_level  wind.speed  wind.deg  wind.gust  clouds.all  sys.type  \
0             1000        0.89       355       2.24          18         2   

    sys.id  sys.country  sys.sunrise  sys.sunset  
0  2091269           GB   1741760480  1741802358  

[1 rows x 27 columns]

Transformed Data (ETL):
   PassengerId                                               Name     Sex  \
0            1                            Braund, Mr. Owen Harris    male   
1            2  Cumings, Mrs. John Bradley (Florence Briggs Th...  female   
2            3                             Heikkinen, Miss. Laina  female   
3            4       Futrelle, Mrs. Jacques Heath (Lily May Peel)  

In [11]:
import pandas as pd
import sqlite3
import requests

# Step 1: Extract - Load data from different file types and API
# CSV Data
csv_url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df_csv = pd.read_csv(csv_url)

# JSON Data
json_url = "https://jsonplaceholder.typicode.com/users"
df_json = pd.read_json(json_url)

# API Data (Weather Data)
api_key = "c6d622616af71712b32b3d31695efb70"  # Default API key
city = "London"  # Replace with your desired city
api_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"

try:
    response = requests.get(api_url)
    response.raise_for_status()  # Raise an exception for HTTP errors
    api_data = response.json()
    
    # Flatten the API data
    df_api = pd.json_normalize(api_data, sep='_')
    
    # Handle the 'weather' column (list of dictionaries)
    if 'weather' in df_api.columns:
        # Flatten the 'weather' column
        weather_data = pd.json_normalize(api_data['weather'], sep='_')
        
        # Rename columns to avoid conflicts
        weather_data = weather_data.rename(columns={
            'id': 'weather_id',
            'main': 'weather_main',
            'description': 'weather_description',
            'icon': 'weather_icon'
        })
        
        # Add a unique identifier column for merging
        weather_data['weather_unique_id'] = range(len(weather_data))
        df_api['weather_unique_id'] = range(len(df_api))
        
        # Merge the flattened weather data back into the main API data
        df_api = df_api.drop(columns=['weather']).merge(weather_data, on='weather_unique_id', how='left')
    
    print("API Data:")
    print(df_api.head())
except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"An error occurred: {err}")

# Step 2: Load - Load raw data into SQLite Database
conn = sqlite3.connect('data_warehouse.db')

# Load raw CSV data into SQLite
df_csv.to_sql('raw_csv_data', conn, if_exists='replace', index=False)

# Flatten JSON data before loading into SQLite
df_json_flattened = pd.json_normalize(df_json.to_dict(orient='records'), sep='_')
df_json_flattened.to_sql('raw_json_data', conn, if_exists='replace', index=False)

# Load raw API data into SQLite
df_api.to_sql('raw_api_data', conn, if_exists='replace', index=False)

# Step 3: Transform - Perform transformations directly in SQLite
query = """
CREATE TABLE two AS
SELECT 
    rcd.PassengerId, 
    rcd.Name, 
    rcd.Sex, 
    rcd.Age, 
    rcd.Fare, 
    rjd.name AS employee_name, 
    rjd.email
FROM 
    raw_csv_data rcd
LEFT JOIN 
    raw_json_data rjd
ON 
    rcd.PassengerId % 10 = rjd.id
"""
conn.execute(query)

# Verify the transformed data in the data warehouse
print("\nTransformed Data (ELT):")
print(pd.read_sql('SELECT * FROM two LIMIT 5', conn))

# Close connection
conn.close()


API Data:
       base  visibility          dt  timezone       id    name  cod  \
0  stations       10000  1741763551         0  2643743  London  200   

   coord_lon  coord_lat  main_temp  ...  sys_type   sys_id  sys_country  \
0    -0.1257    51.5085     277.06  ...         2  2091269           GB   

   sys_sunrise  sys_sunset  weather_unique_id  weather_id  weather_main  \
0   1741760480  1741802358                  0         801        Clouds   

   weather_description  weather_icon  
0           few clouds           02d  

[1 rows x 31 columns]

Transformed Data (ELT):
   PassengerId                                               Name     Sex  \
0            1                            Braund, Mr. Owen Harris    male   
1            2  Cumings, Mrs. John Bradley (Florence Briggs Th...  female   
2            3                             Heikkinen, Miss. Laina  female   
3            4       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female   
4            5                    

In [13]:
import pandas as pd
import sqlite3
import requests

# Step 1: Extract - Load data from different file types and API
# CSV Data
csv_url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df_csv = pd.read_csv(csv_url)
print (df_csv)

     PassengerId  Survived  Pclass  \
0              1         0       3   
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
..           ...       ...     ...   
886          887         0       2   
887          888         1       1   
888          889         0       3   
889          890         1       1   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                                 ...     ...   ... 

In [15]:
json_url = "https://jsonplaceholder.typicode.com/users"
df_json = pd.read_json(json_url)
df_json

Unnamed: 0,id,name,username,email,address,phone,website,company
0,1,Leanne Graham,Bret,Sincere@april.biz,"{'street': 'Kulas Light', 'suite': 'Apt. 556',...",1-770-736-8031 x56442,hildegard.org,"{'name': 'Romaguera-Crona', 'catchPhrase': 'Mu..."
1,2,Ervin Howell,Antonette,Shanna@melissa.tv,"{'street': 'Victor Plains', 'suite': 'Suite 87...",010-692-6593 x09125,anastasia.net,"{'name': 'Deckow-Crist', 'catchPhrase': 'Proac..."
2,3,Clementine Bauch,Samantha,Nathan@yesenia.net,"{'street': 'Douglas Extension', 'suite': 'Suit...",1-463-123-4447,ramiro.info,"{'name': 'Romaguera-Jacobson', 'catchPhrase': ..."
3,4,Patricia Lebsack,Karianne,Julianne.OConner@kory.org,"{'street': 'Hoeger Mall', 'suite': 'Apt. 692',...",493-170-9623 x156,kale.biz,"{'name': 'Robel-Corkery', 'catchPhrase': 'Mult..."
4,5,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,"{'street': 'Skiles Walks', 'suite': 'Suite 351...",(254)954-1289,demarco.info,"{'name': 'Keebler LLC', 'catchPhrase': 'User-c..."
5,6,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,"{'street': 'Norberto Crossing', 'suite': 'Apt....",1-477-935-8478 x6430,ola.org,"{'name': 'Considine-Lockman', 'catchPhrase': '..."
6,7,Kurtis Weissnat,Elwyn.Skiles,Telly.Hoeger@billy.biz,"{'street': 'Rex Trail', 'suite': 'Suite 280', ...",210.067.6132,elvis.io,"{'name': 'Johns Group', 'catchPhrase': 'Config..."
7,8,Nicholas Runolfsdottir V,Maxime_Nienow,Sherwood@rosamond.me,"{'street': 'Ellsworth Summit', 'suite': 'Suite...",586.493.6943 x140,jacynthe.com,"{'name': 'Abernathy Group', 'catchPhrase': 'Im..."
8,9,Glenna Reichert,Delphine,Chaim_McDermott@dana.io,"{'street': 'Dayna Park', 'suite': 'Suite 449',...",(775)976-6794 x41206,conrad.com,"{'name': 'Yost and Sons', 'catchPhrase': 'Swit..."
9,10,Clementina DuBuque,Moriah.Stanton,Rey.Padberg@karina.biz,"{'street': 'Kattie Turnpike', 'suite': 'Suite ...",024-648-3804,ambrose.net,"{'name': 'Hoeger LLC', 'catchPhrase': 'Central..."


In [17]:
json_url = "https://jsonplaceholder.typicode.com/users"
df_json = pd.read_json(json_url)
df_json


Unnamed: 0,id,name,username,email,address,phone,website,company
0,1,Leanne Graham,Bret,Sincere@april.biz,"{'street': 'Kulas Light', 'suite': 'Apt. 556',...",1-770-736-8031 x56442,hildegard.org,"{'name': 'Romaguera-Crona', 'catchPhrase': 'Mu..."
1,2,Ervin Howell,Antonette,Shanna@melissa.tv,"{'street': 'Victor Plains', 'suite': 'Suite 87...",010-692-6593 x09125,anastasia.net,"{'name': 'Deckow-Crist', 'catchPhrase': 'Proac..."
2,3,Clementine Bauch,Samantha,Nathan@yesenia.net,"{'street': 'Douglas Extension', 'suite': 'Suit...",1-463-123-4447,ramiro.info,"{'name': 'Romaguera-Jacobson', 'catchPhrase': ..."
3,4,Patricia Lebsack,Karianne,Julianne.OConner@kory.org,"{'street': 'Hoeger Mall', 'suite': 'Apt. 692',...",493-170-9623 x156,kale.biz,"{'name': 'Robel-Corkery', 'catchPhrase': 'Mult..."
4,5,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,"{'street': 'Skiles Walks', 'suite': 'Suite 351...",(254)954-1289,demarco.info,"{'name': 'Keebler LLC', 'catchPhrase': 'User-c..."
5,6,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,"{'street': 'Norberto Crossing', 'suite': 'Apt....",1-477-935-8478 x6430,ola.org,"{'name': 'Considine-Lockman', 'catchPhrase': '..."
6,7,Kurtis Weissnat,Elwyn.Skiles,Telly.Hoeger@billy.biz,"{'street': 'Rex Trail', 'suite': 'Suite 280', ...",210.067.6132,elvis.io,"{'name': 'Johns Group', 'catchPhrase': 'Config..."
7,8,Nicholas Runolfsdottir V,Maxime_Nienow,Sherwood@rosamond.me,"{'street': 'Ellsworth Summit', 'suite': 'Suite...",586.493.6943 x140,jacynthe.com,"{'name': 'Abernathy Group', 'catchPhrase': 'Im..."
8,9,Glenna Reichert,Delphine,Chaim_McDermott@dana.io,"{'street': 'Dayna Park', 'suite': 'Suite 449',...",(775)976-6794 x41206,conrad.com,"{'name': 'Yost and Sons', 'catchPhrase': 'Swit..."
9,10,Clementina DuBuque,Moriah.Stanton,Rey.Padberg@karina.biz,"{'street': 'Kattie Turnpike', 'suite': 'Suite ...",024-648-3804,ambrose.net,"{'name': 'Hoeger LLC', 'catchPhrase': 'Central..."


In [19]:
import sqlite3

# Connect to the SQLite database (replace 'your_database.db' with your database file name)
conn = sqlite3.connect('data_warehouse.db')

# Create a cursor object
cursor = conn.cursor()

# Execute the SQL query to get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")

# Fetch and print all table names
tables = cursor.fetchall()
for table in tables:
    print(table[0])

# Close the connection
conn.close()


elt_transformed_data
mining
transformed_data_from
new
one
api_url
transformed_data
raw_csv_data
raw_json_data
raw_api_data
two


In [21]:
import pandas as pd
import sqlite3
import requests

# Step 1: Extract - Load data from different file types and API
# CSV Data
csv_url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df_csv = pd.read_csv(csv_url)

# JSON Data
json_url = "https://jsonplaceholder.typicode.com/users"
df_json = pd.read_json(json_url)

# API Data (Weather Data)
api_key = "c6d622616af71712b32b3d31695efb70"  # Default API key
city = "London"  # Replace with your desired city
api_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}"

try:
    response = requests.get(api_url)
    response.raise_for_status()  # Raise an exception for HTTP errors
    api_data = response.json()
    # Normalize and flatten the JSON data
    df_api = pd.json_normalize(api_data)
    print("API Data:")
    print(df_api.head())
except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"An error occurred: {err}")

# Step 2: Transform - Data Cleaning and Merging
# Clean CSV data
df_csv_cleaned = df_csv[['PassengerId', 'Name', 'Sex', 'Age', 'Fare']].dropna()

# Clean JSON data
df_json_cleaned = df_json[['id', 'name', 'email']].rename(columns={'name': 'employee_name'})

# Transform API data to have a mock 'id' column and select relevant columns
df_api_cleaned = df_api[['weather', 'main.temp', 'main.humidity']].rename(columns={'main.temp': 'temperature', 'main.humidity': 'humidity'})
df_api_cleaned['id'] = df_csv_cleaned['PassengerId'] % 10  # Mock 'id' for merging

# Remove lists from 'weather' column
df_api_cleaned['weather'] = df_api_cleaned['weather'].apply(lambda x: x[0]['description'] if isinstance(x, list) else x)

# Merge datasets for demonstration
df_csv_cleaned['id'] = df_csv_cleaned['PassengerId'] % 10  # Mock 'id' for merging
df_merged = pd.merge(df_csv_cleaned, df_json_cleaned, on='id', how='inner')
df_final = pd.merge(df_merged, df_api_cleaned, on='id', how='inner')

# Step 3: Load - Create and Insert into SQLite Database
conn = sqlite3.connect('data_warehouse.db')

# Load transformed data into SQLite
df_final.to_sql('final_data', conn, if_exists='replace', index=False)

# Verify the data in the data warehouse
print("\nFinal Merged Data (ETL):")
print(pd.read_sql('SELECT * FROM final_data LIMIT 5', conn))


API Data:
                                             weather      base  visibility  \
0  [{'id': 801, 'main': 'Clouds', 'description': ...  stations       10000   

           dt  timezone       id    name  cod  coord.lon  coord.lat  ...  \
0  1741763551         0  2643743  London  200    -0.1257    51.5085  ...   

   main.grnd_level  wind.speed  wind.deg  wind.gust  clouds.all  sys.type  \
0             1000        0.89       355       2.24          18         2   

    sys.id  sys.country  sys.sunrise  sys.sunset  
0  2091269           GB   1741760480  1741802358  

[1 rows x 27 columns]

Final Merged Data (ETL):
   PassengerId                                            Name     Sex   Age  \
0            1                         Braund, Mr. Owen Harris    male  22.0   
1           11                 Sandstrom, Miss. Marguerite Rut  female   4.0   
2           21                            Fynney, Mr. Joseph J    male  35.0   
3           31                        Uruchurtu, Don. 