In [25]:
import json
import pandas as pd
from datetime import datetime

In [26]:
with open("weather_data.json", "r") as f:
    weather_data = json.load(f)

print(f"Total records loaded: {len(weather_data)}")

df = pd.DataFrame(weather_data)
df.head()

Total records loaded: 9600


Unnamed: 0,record_id,datetime_in_utc,city_id,sunrise_in_utc,sunset_in_utc,weather_icon,weather_description,snow_1h,rain_1h,visibility,temperature,feels_like,cloud,humidity,pressure,wind_deg,wind_speed,timezone_offset
0,1,2025-11-30 21:13:52.086664,Berlin,2025-11-30 06:53:15,2025-11-30 14:57:16,04n,overcast clouds,,,10000,7.18,5.35,100,94,1014,197,2.68,3600
1,2,2025-11-30 21:13:53.228624,Hamburg,2025-11-30 07:11:59,2025-11-30 15:05:49,02n,few clouds,,,10000,4.91,0.51,20,78,1012,230,6.69,3600
2,3,2025-11-30 21:13:54.320215,Munich,2025-11-30 06:41:54,2025-11-30 15:23:15,10n,moderate rain,,1.26,10000,5.32,5.32,100,93,1017,278,0.89,3600
3,4,2025-11-30 21:13:55.409954,Colonge,2025-11-30 07:11:49,2025-11-30 15:30:16,04n,overcast clouds,,,10000,5.14,2.61,100,87,1018,240,3.09,3600
4,5,2025-11-30 21:13:56.493793,Frankfurt_am_Main,2025-11-30 07:01:23,2025-11-30 15:26:56,03n,scattered clouds,,,10000,5.66,2.6,40,90,1017,250,4.12,3600


In [27]:
df = df.drop(columns=['record_id'])

In [28]:
df['datetime_in_utc'] = pd.to_datetime(df['datetime_in_utc'])
df['sunrise_in_utc'] = pd.to_datetime(df['sunrise_in_utc'])
df['sunset_in_utc'] = pd.to_datetime(df['sunset_in_utc'])

In [29]:
numeric_fields = ['snow_1h','rain_1h','visibility','temperature', 'feels_like', 'cloud', 'humidity', 'pressure', 'wind_deg', 'wind_speed', 'timezone_offset']
for field in numeric_fields:
    df[field] = pd.to_numeric(df[field], errors='coerce')

In [30]:
print(df.dtypes)

datetime_in_utc        datetime64[ns]
city_id                        object
sunrise_in_utc         datetime64[ns]
sunset_in_utc          datetime64[ns]
weather_icon                   object
weather_description            object
snow_1h                       float64
rain_1h                       float64
visibility                    float64
temperature                   float64
feels_like                    float64
cloud                           int64
humidity                        int64
pressure                        int64
wind_deg                        int64
wind_speed                    float64
timezone_offset                 int64
dtype: object


In [31]:
df.head()

Unnamed: 0,datetime_in_utc,city_id,sunrise_in_utc,sunset_in_utc,weather_icon,weather_description,snow_1h,rain_1h,visibility,temperature,feels_like,cloud,humidity,pressure,wind_deg,wind_speed,timezone_offset
0,2025-11-30 21:13:52.086664,Berlin,2025-11-30 06:53:15,2025-11-30 14:57:16,04n,overcast clouds,,,10000.0,7.18,5.35,100,94,1014,197,2.68,3600
1,2025-11-30 21:13:53.228624,Hamburg,2025-11-30 07:11:59,2025-11-30 15:05:49,02n,few clouds,,,10000.0,4.91,0.51,20,78,1012,230,6.69,3600
2,2025-11-30 21:13:54.320215,Munich,2025-11-30 06:41:54,2025-11-30 15:23:15,10n,moderate rain,,1.26,10000.0,5.32,5.32,100,93,1017,278,0.89,3600
3,2025-11-30 21:13:55.409954,Colonge,2025-11-30 07:11:49,2025-11-30 15:30:16,04n,overcast clouds,,,10000.0,5.14,2.61,100,87,1018,240,3.09,3600
4,2025-11-30 21:13:56.493793,Frankfurt_am_Main,2025-11-30 07:01:23,2025-11-30 15:26:56,03n,scattered clouds,,,10000.0,5.66,2.6,40,90,1017,250,4.12,3600


In [32]:
# Step 4: Convert DataFrame back to dictionary for MongoDB insertion
records = df.to_dict(orient='records')

In [None]:
from pymongo import MongoClient
import os

In [34]:
# Connect to MongoDB Atlas
mongo_uri = os.getenv("MONGO_URI")
client = MongoClient(mongo_uri)
db = client['germany_weather_db']           # replace with your DB name
collection = db['weather_data']     # replace with your collection name

# Insert records into MongoDB
collection.insert_many(records)

print("Data inserted successfully!")

Data inserted successfully!
