# MOUNT KE GDRIVE

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# DEFINISI PATH PROYEK

In [2]:
# ROOT FOLDER PROYEK
PROJECT_ROOT = "/content/drive/MyDrive/bigdata_final_project"

# SUBFOLDER
RAW_PATH       = f"{PROJECT_ROOT}/raw"
WAREHOUSE_PATH = f"{PROJECT_ROOT}/warehouse"
ELT_PATH       = f"{PROJECT_ROOT}/elt_pipeline"

# PATH DATABASE SQLITE (DATA WAREHOUSE)
DB_PATH = f"{WAREHOUSE_PATH}/bigdata_warehouse.db"


# LIBRARY

In [3]:
import sqlite3
import pandas as pd
import requests
import json
import os

# KONFIGURASI WAKTU DAN KOORDINAT

In [None]:
YEAR = "2025"
MONTH = "01"
START_DATE = f"{YEAR}-{MONTH}-01"
END_DATE = f"{YEAR}-{MONTH}-31"


# Koordinat New York City
lat = 40.7128
long = -74.0060


# EXTRACT DATA CUACA


In [None]:
api_url = (
    f"https://archive-api.open-meteo.com/v1/archive?"
    f"latitude={lat}&longitude={long}&"
    f"start_date={START_DATE}&end_date={END_DATE}&"
    f"hourly=temperature_2m,rain,snowfall&timezone=America%2FNew_York"
)

try:
    response = requests.get(api_url)
    response.raise_for_status() # Cek error koneksi
    data = response.json()

    df_weather = pd.DataFrame(data['hourly'])

    df_weather['time'] = pd.to_datetime(df_weather['time'])

    output_csv = f"{RAW_PATH}/nyc_weather_{START_DATE}_{END_DATE}.csv"

    df_weather.to_csv(output_csv, index=False)

    print(f"‚úÖ Berhasil! Data disimpan ke: {output_csv}")
    print(f"üìä Total data: {len(df_weather)} baris")

    # --- 6. Tampilkan Hasil ---
    print("\nPreview 5 data teratas:")
    display(df_weather.head()) # 'display' khusus fitur Colab/Jupyter

except Exception as e:
    print(f"‚ùå Terjadi Error: {e}")

‚úÖ Berhasil! Data disimpan ke: /content/drive/MyDrive/bigdata_final_project/raw/nyc_weather_2025-01-01_2025-01-31.csv
üìä Total data: 744 baris

Preview 5 data teratas:


Unnamed: 0,time,temperature_2m,rain,snowfall
0,2025-01-01 00:00:00,9.1,0.9,0.0
1,2025-01-01 01:00:00,8.6,0.0,0.0
2,2025-01-01 02:00:00,8.9,0.0,0.0
3,2025-01-01 03:00:00,8.8,0.0,0.0
4,2025-01-01 04:00:00,8.3,0.0,0.0


# CONNECT KE DATA WAREHOUSE

In [8]:
# KONEKSI KE SQLITE DI GOOGLE DRIVE
conn = sqlite3.connect(DB_PATH)


# LOAD DATA KE RAW TABLE (ELT)

In [12]:
df_weather.reset_index().to_sql(
    "raw_weather",
    conn,
    if_exists="replace",
    index=False
)


744

VERIFIKASI SQL

In [13]:
pd.read_sql(
    "SELECT name FROM sqlite_master WHERE type='table';",
    conn
)


Unnamed: 0,name
0,raw_taxi
1,raw_weather_hourly
2,raw_weather


In [11]:
# cek isi data

pd.read_sql(
    "SELECT * FROM raw_weather LIMIT 5;",
    conn
)


Unnamed: 0,index,time,temperature_2m,rain,snowfall
0,0,2025-01-01 00:00:00,9.1,0.9,0.0
1,1,2025-01-01 01:00:00,8.6,0.0,0.0
2,2,2025-01-01 02:00:00,8.9,0.0,0.0
3,3,2025-01-01 03:00:00,8.8,0.0,0.0
4,4,2025-01-01 04:00:00,8.3,0.0,0.0


# VERIFIKASI FILE FISIK

In [14]:
!ls "/content/drive/MyDrive/bigdata_final_project/warehouse"


bigdata_warehouse.db  transport_dw.db


# LOGGING

In [17]:
print("=== LOG ELT ‚Äì WEATHER RAW ===")
print("Sumber API    : Meteostat")
print("Lokasi        : New York")
print("Periode       : 1-1-2025 s.d. 31-1-2025")
print("Granularitas  : Hourly")
print("Jumlah Baris  :", df_weather.shape[0])
print("Jumlah Kolom  :", df_weather.shape[1])


=== LOG ELT ‚Äì WEATHER RAW (HOURLY) ===
Sumber API    : Meteostat
Lokasi        : New York
Periode       : 1-1-2025 s.d. 31-1-2025
Granularitas  : Hourly
Jumlah Baris  : 744
Jumlah Kolom  : 4
