In [None]:
# Install pymongo and pandas if needed
# !pip install pymongo pandas

## 2. Import Libraries and Connect to MongoDB


In [1]:
# Importing required libraries
from pymongo import MongoClient # Import MongoDB client
import pandas as pd # Import pandas library

# Connect to MongoDB 
client = MongoClient("mongodb+srv://admin:admin123@aitask.cacln.mongodb.net/") # Connect to MongoDB Atlas
db = client["GlobalWeatherDB"] # Create or connect to the database
collection = db["GlobalWeatherData"] # Create or connect to the collection


## 3. Import Data from CSV to MongoDB
This cell imports the CSV file into the MongoDB collection after reading it and turning it into a dictionary style.

In [2]:
# Load CSV data
data = pd.read_csv(
    r'/LABS/Programming for Artificial Intelligence (MSCAI1) LAB/CA_Cem-Koyluoglu/app/GlobalWeatherRepository.csv')
data_dict = data.to_dict("records")  # Convert DataFrame to list of dictionaries 

# Insert data into MongoDB
collection.insert_many(data_dict)
if collection.count_documents({}) == data.shape[0]:
    print("Data inserted successfully")
else:
    print("Data insertion failed")

Data inserted successfully


## Step 3: Convert last_updated Field to Date Format
Right now, the last_updated column in the data imported into MongoDB is text—a string. We translate this parameter into date form so that we can search month by month.

In [33]:
# Convert `last_updated` field to date format
collection.update_many(
    {},  # Update all documents
    [{"$set": {"last_updated": {"$toDate": "$last_updated"}}}] # aggregation pipeline
)

print("Converted `last_updated` field to ISODate format.")


Converted `last_updated` field to ISODate format.


## Step 4: Verify Data Structure
I check the data structure to make sure everything is working correctly.
If the last_updated field appears as datetime.datetime(YYYY, MM, DD, HH, MM), the conversion was successful.


In [34]:
# Let's check the first document in MongoDB
print(collection.find_one())


{'_id': ObjectId('671f86bc3bd5a23bb3a2d111'), 'country': 'Afghanistan', 'location_name': 'Kabul', 'latitude': 34.52, 'longitude': 69.18, 'timezone': 'Asia/Kabul', 'last_updated_epoch': 1715849100, 'last_updated': datetime.datetime(2024, 5, 16, 13, 15), 'temperature_celsius': 26.6, 'temperature_fahrenheit': 79.8, 'condition_text': 'Partly Cloudy', 'wind_mph': 8.3, 'wind_kph': 13.3, 'wind_degree': 338, 'wind_direction': 'NNW', 'pressure_mb': 1012.0, 'pressure_in': 29.89, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 24, 'cloud': 30, 'feels_like_celsius': 25.3, 'feels_like_fahrenheit': 77.5, 'visibility_km': 10.0, 'visibility_miles': 6.0, 'uv_index': 7.0, 'gust_mph': 9.5, 'gust_kph': 15.3, 'air_quality_Carbon_Monoxide': 277.0, 'air_quality_Ozone': 103.0, 'air_quality_Nitrogen_dioxide': 1.1, 'air_quality_Sulphur_dioxide': 0.2, 'air_quality_PM2.5': 8.4, 'air_quality_PM10': 26.6, 'air_quality_us-epa-index': 1, 'air_quality_gb-defra-index': 1, 'sunrise': '04:50 AM', 'sunset': '06:50 PM', 'm

## Step 5: Sort Records by Month
MongoDB's $month operator lets us query by month since the last_updated field is in date form. To get data for January, for instance:

In [36]:
# # Helper func to get data based on a certain month
def get_records_by_month(month):
    # Extract and match the month from the 'last_updated' field
    results = list(collection.find({"$expr": { "$eq": [{ "$month": "$last_updated" }, month]}}))
    return pd.DataFrame(results)  # Convert to results in DataFrame format

# Example: Show records from July (July = 7)
get_records_by_month(7)

Unnamed: 0,_id,country,location_name,latitude,longitude,timezone,last_updated_epoch,last_updated,temperature_celsius,temperature_fahrenheit,...,air_quality_PM2.5,air_quality_PM10,air_quality_us-epa-index,air_quality_gb-defra-index,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination
0,671f86bc3bd5a23bb3a2f423,Fiji Islands,Suva,-18.13,178.42,Pacific/Fiji,1719754200,2024-07-01 01:30:00,22.0,71.6,...,2.3,10.9,1,1,06:38 AM,05:42 PM,12:43 AM,12:43 PM,Waning Crescent,38
1,671f86bc3bd5a23bb3a2f442,Kiribati,Tarawa,-0.88,169.53,Pacific/Tarawa,1719754200,2024-07-01 01:30:00,28.5,83.3,...,0.8,1.9,1,1,06:43 AM,06:48 PM,01:11 AM,01:33 PM,Waning Crescent,38
2,671f86bc3bd5a23bb3a2f454,Marshall Islands,Majuro,7.10,171.38,Pacific/Majuro,1719754200,2024-07-01 01:30:00,29.2,84.6,...,0.7,1.8,1,1,06:22 AM,06:54 PM,12:59 AM,01:32 PM,Waning Crescent,38
3,671f86bc3bd5a23bb3a2f458,Micronesia,Palikir,6.92,158.15,Pacific/Pohnpei,1719754200,2024-07-01 00:30:00,28.4,83.1,...,0.5,1.4,1,1,06:15 AM,06:47 PM,12:54 AM,01:26 PM,Waning Crescent,38
4,671f86bc3bd5a23bb3a2f464,New Zealand,Wellington,-41.30,174.78,Pacific/Auckland,1719754200,2024-07-01 01:30:00,13.4,56.1,...,0.8,3.4,1,1,07:48 AM,05:02 PM,01:15 AM,12:35 PM,Waning Crescent,38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5576,671f86bc3bd5a23bb3a30a71,Venezuela,Caracas,10.50,-66.92,America/Caracas,1722428100,2024-07-31 08:15:00,21.6,70.9,...,12.7,13.3,1,2,06:16 AM,06:51 PM,02:37 AM,03:52 PM,Waning Crescent,20
5577,671f86bc3bd5a23bb3a30a72,Vietnam,Hanoi,21.03,105.85,Asia/Bangkok,1722428100,2024-07-31 19:15:00,27.0,80.5,...,40.2,47.8,2,4,05:30 AM,06:35 PM,01:15 AM,03:18 PM,Waning Crescent,20
5578,671f86bc3bd5a23bb3a30a73,Yemen,Sanaa,15.35,44.21,Asia/Aden,1722429000,2024-07-31 15:30:00,22.3,72.2,...,19.4,68.6,2,2,05:46 AM,06:34 PM,01:44 AM,03:21 PM,Waning Crescent,20
5579,671f86bc3bd5a23bb3a30a74,Zambia,Lusaka,-15.42,28.28,Africa/Lusaka,1722429000,2024-07-31 14:30:00,28.9,84.0,...,25.2,30.9,2,3,06:30 AM,05:56 PM,02:58 AM,02:16 PM,Waning Crescent,20


## Step 6: Test Query for Another Month
To query for another month, change the month parameter. For example, to get data for May:

In [None]:
get_records_by_month(5)

## Extra: Get Top 3 Hottest Locations
To see the top 3 locations with the highest temperatures:

In [37]:
# Function to bring the first 3 locations with the highest temperature
def get_top_3_hottest_locations():
    results = list(collection.find().sort("temperature_celsius", -1).limit(3))
    return pd.DataFrame(results)  # return results in DataFrame format

# Show top 3 locations with highest temperature
get_top_3_hottest_locations()

Unnamed: 0,_id,country,location_name,latitude,longitude,timezone,last_updated_epoch,last_updated,temperature_celsius,temperature_fahrenheit,...,air_quality_PM2.5,air_quality_PM10,air_quality_us-epa-index,air_quality_gb-defra-index,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination
0,671f86bc3bd5a23bb3a2ec01,Kuwait,Kuwait City,29.37,47.96,Asia/Kuwait,1718804700,2024-06-19 16:45:00,49.2,120.6,...,33.6,187.0,2,3,04:49 AM,06:50 PM,04:33 PM,02:17 AM,Waxing Gibbous,90
1,671f86bc3bd5a23bb3a2fc9a,Iraq,Baghdad,33.34,44.39,Asia/Baghdad,1720701900,2024-07-11 15:45:00,49.1,120.3,...,12.3,39.5,1,2,05:02 AM,07:14 PM,09:58 AM,10:37 PM,Waxing Crescent,23
2,671f86bc3bd5a23bb3a2ee35,Iraq,Baghdad,33.34,44.39,Asia/Baghdad,1719063900,2024-06-22 16:45:00,49.1,120.4,...,16.1,47.9,2,2,04:54 AM,07:16 PM,08:06 PM,04:42 AM,Full Moon,100


## Extra: Get Top 3 Days with Highest Precipitation
To see the top 3 days with the highest rainfall:

In [38]:
# Function to retrieve the first 3 days with the highest rainfall
def get_top_3_highest_precipitation():
    results = list(collection.find().sort("precip_mm", -1).limit(3))
    return pd.DataFrame(results)  # Return results in DataFrame format

# Show top 3 days with highest precipitation
get_top_3_highest_precipitation()

Unnamed: 0,_id,country,location_name,latitude,longitude,timezone,last_updated_epoch,last_updated,temperature_celsius,temperature_fahrenheit,...,air_quality_PM2.5,air_quality_PM10,air_quality_us-epa-index,air_quality_gb-defra-index,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination
0,671f86bc3bd5a23bb3a32763,Vietnam,Hanoi,21.03,105.85,Asia/Bangkok,1725710400,2024-09-07 19:00:00,26.2,79.2,...,9.435,13.505,1,1,05:42 AM,06:07 PM,08:53 AM,08:27 PM,Waxing Crescent,13
1,671f86bc3bd5a23bb3a347b8,Jamaica,Port Royal,17.9333,-76.85,America/Jamaica,1729416600,2024-10-20 04:30:00,27.1,80.8,...,8.51,12.95,1,1,06:02 AM,05:42 PM,08:33 PM,09:21 AM,Waning Gibbous,91
2,671f86bc3bd5a23bb3a31cf8,Fiji Islands,Suva,-18.13,178.42,Pacific/Fiji,1724587200,2024-08-26 00:00:00,20.3,68.5,...,1.0,1.3,1,1,06:19 AM,05:58 PM,11:30 PM,10:04 AM,Waning Gibbous,65
