In [1]:
import pandas as pd
import json
import requests

In [2]:
#local file

local_file = "/content/weather.csv"

try:
    data = pd.read_csv(local_file)
    print(data.head())
    print(data.columns)
except FileNotFoundError:
    print("Error: The file was not found.")


   Data.Precipitation   Date.Full  Date.Month  Date.Week of  Date.Year  \
0                0.00  2016-01-03           1             3       2016   
1                0.00  2016-01-03           1             3       2016   
2                0.16  2016-01-03           1             3       2016   
3                0.00  2016-01-03           1             3       2016   
4                0.01  2016-01-03           1             3       2016   

  Station.City Station.Code Station.Location Station.State  \
0   Birmingham          BHM   Birmingham, AL       Alabama   
1   Huntsville          HSV   Huntsville, AL       Alabama   
2       Mobile          MOB       Mobile, AL       Alabama   
3   Montgomery          MGM   Montgomery, AL       Alabama   
4    Anchorage          ANC    Anchorage, AK        Alaska   

   Data.Temperature.Avg Temp  Data.Temperature.Max Temp  \
0                         39                         46   
1                         39                         47   
2    

In [3]:
# cleaned Data
df = pd.DataFrame(data, columns=['Station.State', 'Date.Year', 'Date.Month', 'Data.Precipitation','Data.Temperature.Avg Temp', 'Data.Wind.Speed','Data.Wind.Direction'])
df = df.loc[df['Date.Month'] == 3]

# group by 'Station.State', 'Date.Year', and 'Date.Month' + calc average for specified columns
grouped_df = df.groupby(['Station.State', 'Date.Year', 'Date.Month'])[['Data.Precipitation',
                                                                      'Data.Temperature.Avg Temp',
                                                                      'Data.Wind.Speed',
                                                                      'Data.Wind.Direction']].mean()

grouped_df = grouped_df.reset_index()
grouped_df = grouped_df.round(2)
print(grouped_df)


     Station.State  Date.Year  Date.Month  Data.Precipitation  \
0          Alabama       2016           3                1.30   
1           Alaska       2016           3                0.36   
2          Arizona       2016           3                0.02   
3         Arkansas       2016           3                1.26   
4       California       2016           3                0.84   
5         Colorado       2016           3                0.17   
6      Connecticut       2016           3                0.39   
7               DE       2016           3                0.34   
8         Delaware       2016           3                0.34   
9          Florida       2016           3                0.61   
10         Georgia       2016           3                0.72   
11          Hawaii       2016           3                0.53   
12           Idaho       2016           3                0.51   
13        Illinois       2016           3                0.63   
14         Indiana       

In [4]:
grouped_df.to_csv("2016_cleaned_data.csv", index=False)


In [5]:
# calling/creating API

import requests

def get_current_weather(api_key, location):
    try:
        url = "http://api.weatherapi.com/v1/current.json"
        params = {"key": api_key, "q": location}
        response = requests.get(url, params=params)

        response.raise_for_status()

        data = response.json()
        current = data.get("current", {})

        summary = {
            "Location": data.get("location", {}).get("region"),
            "Time": current.get("last_updated"),
            "Precipitation (mm)": current.get("precip_mm"),
            "Temperature (°F)": current.get("temp_f"),
            "Wind Speed (mph)": current.get("wind_mph"),
            "Wind Direction": current.get("wind_dir"),
        }
        return summary

    except requests.RequestException as e:
        print(f"Error fetching weather data: {e}")
        return None

# Usage
api_key = "23b0589184d4491b85923455251903"
location = "Alabama"

data = get_current_weather(api_key, location)
print(data)


{'Location': 'New York', 'Time': '2025-05-09 21:15', 'Precipitation (mm)': 0.0, 'Temperature (°F)': 50.2, 'Wind Speed (mph)': 2.2, 'Wind Direction': 'ENE'}


In [6]:
#cleaned API
country = 'USA United States of America'

# all 50 U.S. states
states = [
    "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware",
    "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
    "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
    "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
    "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
    "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
    "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"
]

weather_data = []
for state in states:
    try:
        data = get_current_weather(api_key, f"{state}, {country}")
        if data:
            weather_data.append(data)
        else:
            print(f" No data returned for {state}.")
    except Exception as e:
        print(f"Error fetching data for {state}: {e}")

# convert  df and save as CSV
try:
    if weather_data:
        weather_df = pd.DataFrame(weather_data)
        weather_df.to_csv("2025_weather_data.csv", index=False)
        print("CSV file '2025_weather_data.csv' created successfully!")
    else:
        print("No weather data was collected, CSV file not created.")
except Exception as e:
    print(f"Error saving CSV file: {e}")

CSV file '2025_weather_data.csv' created successfully!


In [7]:
# 2025_weather in json (Supported Formats: You must implement support for both CSV and JSON formats
#(retrieved from either file dumps or API calls). The two data sources do not need to
#be merged but should be handled separately)

weather_df.to_json("2025_weather_data.json", orient="records", indent=4)


Cleaning the Data to be Merged

In [8]:
# in grouped df, get rid of DE, puerto rico, two ohios, and va
grouped_df2 = grouped_df.loc[~grouped_df["Station.State"].isin(["DE", "Puerto Rico", "VA", "Delaware", "Florida", "Indiana", "Oregon", "Washington", "Wyoming"])]

# weather df does not have delaware or florida or indiana or oregon or washington
# has two michigans, three ohios, two pennsylvanis, district of columbia, and puerto rico
weather_df = weather_df.sort_values(by="Location").reset_index(drop=True)
weather_df2 = weather_df.loc[~weather_df["Location"].isin(["District of Columbia", "Puerto Rico"])]
weather_df2 = weather_df2.drop_duplicates(subset="Location", keep="first").reset_index(drop=True)


In [9]:
# checking to make sure we have the same states...
# check if all states in grouped_df are present in weather_df
grouped_states = set(grouped_df2["Station.State"])
weather_states = set(weather_df2["Location"])

# find states missing from either DataFrame
missing_from_weather = grouped_states - weather_states
missing_from_grouped = weather_states - grouped_states

# error messages
print("States in grouped_df but not in weather_df:", missing_from_weather)
print("States in weather_df but not in grouped_df:", missing_from_grouped)

States in grouped_df but not in weather_df: set()
States in weather_df but not in grouped_df: set()


In [10]:
# MERGING TO COMPARE MARCH 2016 TO 2025

# rename columns to indicate source dataset
grouped_df2 = grouped_df2.add_suffix("_dataset")
weather_df2 = weather_df2.add_suffix("_api")

# merge on state/location
final_df2 = pd.merge(
    grouped_df2, weather_df2,
    left_on="Station.State_dataset", right_on="Location_api",
    how="inner"  # or "outer" if you want to keep unmatched states
)

# drop redundant state columns if necessary
final_df2.drop(columns=["Location_api"], inplace=True)

print(final_df2.head())
print(final_df2.tail())


  Station.State_dataset  Date.Year_dataset  Date.Month_dataset  \
0               Alabama               2016                   3   
1                Alaska               2016                   3   
2               Arizona               2016                   3   
3              Arkansas               2016                   3   
4            California               2016                   3   

   Data.Precipitation_dataset  Data.Temperature.Avg Temp_dataset  \
0                        1.30                              60.31   
1                        0.36                              31.48   
2                        0.02                              59.85   
3                        1.26                              55.00   
4                        0.84                              57.88   

   Data.Wind.Speed_dataset  Data.Wind.Direction_dataset          Time_api  \
0                     6.60                        19.88  2025-05-09 20:15   
1                     6.44              

In [11]:
# save merged dataset as csv file
final_df2.to_csv("final_weather_comparison.csv", index=False)
print(final_df2.head())

  Station.State_dataset  Date.Year_dataset  Date.Month_dataset  \
0               Alabama               2016                   3   
1                Alaska               2016                   3   
2               Arizona               2016                   3   
3              Arkansas               2016                   3   
4            California               2016                   3   

   Data.Precipitation_dataset  Data.Temperature.Avg Temp_dataset  \
0                        1.30                              60.31   
1                        0.36                              31.48   
2                        0.02                              59.85   
3                        1.26                              55.00   
4                        0.84                              57.88   

   Data.Wind.Speed_dataset  Data.Wind.Direction_dataset          Time_api  \
0                     6.60                        19.88  2025-05-09 20:15   
1                     6.44              