In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [23]:
file_path = "/content/public-disasters.csv"
df = pd.read_csv(file_path)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1066 entries, 0 to 1065
Data columns (total 29 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   DisNo.                       1066 non-null   object 
 1   Disaster Subgroup            1066 non-null   object 
 2   Disaster Type                1066 non-null   object 
 3   Disaster Subtype             1066 non-null   object 
 4   Event Name                   151 non-null    object 
 5   ISO                          1066 non-null   object 
 6   Country                      1066 non-null   object 
 7   Subregion                    1066 non-null   object 
 8   Region                       1066 non-null   object 
 9   Location                     1064 non-null   object 
 10  Origin                       360 non-null    object 
 11  Associated Types             441 non-null    object 
 12  AID Contribution ('000 US$)  14 non-null     float64
 13  Magnitude         

In [24]:
#remove COuntry India and Bermuda
df = df[~df['Country'].isin(['India', 'Bermuda'])]

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 682 entries, 1 to 1064
Data columns (total 29 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   DisNo.                       682 non-null    object 
 1   Disaster Subgroup            682 non-null    object 
 2   Disaster Type                682 non-null    object 
 3   Disaster Subtype             682 non-null    object 
 4   Event Name                   115 non-null    object 
 5   ISO                          682 non-null    object 
 6   Country                      682 non-null    object 
 7   Subregion                    682 non-null    object 
 8   Region                       682 non-null    object 
 9   Location                     681 non-null    object 
 10  Origin                       169 non-null    object 
 11  Associated Types             319 non-null    object 
 12  AID Contribution ('000 US$)  0 non-null      float64
 13  Magnitude               

In [26]:
df.head()

Unnamed: 0,DisNo.,Disaster Subgroup,Disaster Type,Disaster Subtype,Event Name,ISO,Country,Subregion,Region,Location,...,Start Day,End Year,End Month,End Day,Total Deaths,No. Injured,No. Affected,No. Homeless,Total Affected,Total Damage ('000 US$)
1,2000-0021-USA,Meteorological,Storm,Tornado,,USA,United States of America,Northern America,Americas,"Crittenden, Daviess, Webster districts (Kentuc...",...,2.0,2000,1,4.0,1.0,12.0,,150.0,162.0,210000.0
2,2000-0067-USA,Meteorological,Storm,Tornado,,USA,United States of America,Northern America,Americas,"Bullock, Montgomery districts (Alabama provinc...",...,13.0,2000,2,14.0,22.0,100.0,900.0,,1000.0,
3,2000-0080-USA,Hydrological,Flood,Riverine flood,,USA,United States of America,Northern America,Americas,"Kentucky, Ohio provinces",...,18.0,2000,2,22.0,3.0,,231.0,,231.0,
4,2000-0128-USA,Climatological,Wildfire,Forest fire,,USA,United States of America,Northern America,Americas,"Gainesville, Alachua areas (Alachua district, ...",...,26.0,2000,2,26.0,,,600.0,,600.0,
5,2000-0175-USA,Meteorological,Storm,Tornado,,USA,United States of America,Northern America,Americas,"Fort Worth Texas city (Tarrant district, Texas...",...,28.0,2000,3,28.0,5.0,,3000.0,300.0,3300.0,450000.0


In [27]:
import pandas as pd
from meteostat import Daily, Stations
from datetime import datetime

# Ensure essential columns exist
required_columns = ['Start Year', 'Start Month', 'Start Day']
for col in required_columns:
    if col not in df.columns:
        print(f"Missing column: {col}")
        raise KeyError(f"Data is missing required column: {col}")

# Fill missing values with default values
df['Start Day'].fillna(1, inplace=True)
df['Start Month'].fillna(1, inplace=True)
df.dropna(subset=['Start Year'], inplace=True)  # Drop rows with no year

# Convert to integers safely
df['Start Year'] = df['Start Year'].astype(int)
df['Start Month'] = df['Start Month'].astype(int)
df['Start Day'] = df['Start Day'].astype(int)

# Create a new column with a combined date format
df['Date'] = df[['Start Year', 'Start Month', 'Start Day']].apply(
    lambda x: '-'.join(x.astype(str)), axis=1
)

# Convert to datetime using stack/unstack approach
df['Date'] = pd.to_datetime(df['Date'], errors='coerce', format='%Y-%m-%d')

# Drop invalid dates
df.dropna(subset=['Date'], inplace=True)

# Print debug info
print("After datetime conversion:")
print(df[['Date']].head())

# Create an empty list to store weather data
weather_data = []

# Iterate through dataset
for index, row in df.iterrows():
    lat, lon = row['Latitude'], row['Longitude']

    if pd.notna(lat) and pd.notna(lon):  # Use coordinates if available
        try:
            # Find nearest weather station
            stations = Stations().nearby(lat, lon)
            station = stations.fetch(1)  # Get the closest station

            if not station.empty:
                station_id = station.index[0]
                date = row['Date']

                # Fetch weather data
                weather = Daily(station_id, start=date, end=date).fetch()

                if not weather.empty:
                    temp = weather['tavg'].values[0] if 'tavg' in weather.columns else None
                    wind = weather['wspd'].values[0] if 'wspd' in weather.columns else None
                    precip = weather['prcp'].values[0] if 'prcp' in weather.columns else None

                    weather_data.append({
                        'Index': index,
                        'Temperature (C)': temp,
                        'Wind Speed (km/h)': wind,
                        'Precipitation (mm)': precip
                    })
        except Exception as e:
            print(f"Error fetching weather for index {index}: {e}")
    else:
        # No coordinates available, set weather as None
        weather_data.append({'Index': index, 'Temperature (C)': None, 'Wind Speed (km/h)': None, 'Precipitation (mm)': None})

# Convert weather data to DataFrame
weather_df = pd.DataFrame(weather_data)

weather_df.to_csv("weather_data.csv", index=False)
#add index
weather_df['Index'] = weather_df.index
df['Index'] = df.index

# Merge weather data with original dataset
df = pd.merge(df, weather_df, on='Index', how='left')
# Save updated dataset
df.to_csv("disaster_data_with_weather.csv", index=False)

print("Weather data merged successfully!")


After datetime conversion:
        Date
1 2000-01-02
2 2000-02-13
3 2000-02-18
4 2000-02-26
5 2000-03-28
Weather data merged successfully!


In [28]:
#Add weather reports for each date
!pip install meteostat
from meteostat import Point, Daily
import datetime



In [30]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime

In [47]:
df = pd.read_csv('/content/disaster_data_with_weather.csv')

In [46]:
import requests
from bs4 import BeautifulSoup

def extract_climate_news(date, location):
    """Extracts climate-related news headlines for a given date and location.

    Args:
        date: A string representing the date in YYYY-MM-DD format.
        location: A string representing the location (e.g., "California").

    Returns:
        A list of news headlines.
    """

    query = f"climate change {location} {date}"
    url = f"https://news.google.com/search?q={query}&hl=en-US&gl=US&ceid=US%3Aen"

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes

        soup = BeautifulSoup(response.content, "html.parser")

        # Look for all anchor tags with href and within the main content
        headlines = [
            a.get_text().strip() for a in soup.find_all("a", href=True)
            if a.get_text().strip() and len(a.get_text().split()) > 3  # Exclude single words
        ]

        return headlines

    except requests.exceptions.RequestException as e:
        print(f"Error fetching news: {e}")
        return []



Climate-related news headlines for 2010-10-26 in California:
New U.S. Standards Take Aim at Truck Emissions and Fuel Economy (Published 2010)
Water, climate change, and sustainability in the southwest
The California Women's Conference
The Next Carbon Capture Tool Could be New, Improved Grass
Space tourism may ignite the effects of global warming
Camera traps snap first ever photo of Myanmar snub-nosed monkey
Provincetown Remembers Mary Oliver
George Soros gives $1m to California's pro-cannabis campaign
Elusive Charles Koch Deploys Security To Block Joel Francis’ Visit to Koch Industries HQ to Invite Debate on Prop 23
Los Angeles Lakers: 10 Ways They Can Win The 2010-2011 NBA Championship
Serving Up Feathered Bait to Attract Ecosystem Data (Published 2010)
Picture: new monkey discovered in Myanmar
Navajos Hope to Shift From Coal to Wind and Sun (Published 2010)
Biological shocker: snake reproduces asexually
Pelicans in Training (Published 2010)
Predators Gone, Small Fish Get Bolder (Pub

In [50]:
# Ensure 'Date' is in datetime format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Apply the extract_climate_news function
df['News Headlines'] = df['Date'].apply(lambda date: extract_climate_news(date.strftime('%Y-%m-%d'), 'Location') if pd.notna(date) else [])
print(df.head())

          DisNo. Disaster Subgroup Disaster Type Disaster Subtype Event Name  \
0  2000-0021-USA    Meteorological         Storm          Tornado        NaN   
1  2000-0067-USA    Meteorological         Storm          Tornado        NaN   
2  2000-0080-USA      Hydrological         Flood   Riverine flood        NaN   
3  2000-0128-USA    Climatological      Wildfire      Forest fire        NaN   
4  2000-0175-USA    Meteorological         Storm          Tornado        NaN   

   ISO                   Country         Subregion    Region  \
0  USA  United States of America  Northern America  Americas   
1  USA  United States of America  Northern America  Americas   
2  USA  United States of America  Northern America  Americas   
3  USA  United States of America  Northern America  Americas   
4  USA  United States of America  Northern America  Americas   

                                            Location  ... No. Affected  \
0  Crittenden, Daviess, Webster districts (Kentuc...  ...   

In [52]:
new_df  = pd.read_csv('/content/disaster_data_with_weather.csv')



In [54]:
import ee
import geemap.core as geemap

In [55]:
ee.Authenticate()
ee.Initialize(project='gemini-genai-454500')

In [56]:
jan_2023_climate = (
    ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')
    .filterDate('2023-01', '2023-02')
    .first()
)
jan_2023_climate

In [57]:
m = geemap.Map(center=[30, 0], zoom=2)

vis_params = {
    'bands': ['temperature_2m'],
    'min': 229,
    'max': 304,
    'palette': 'inferno',
}
m.add_layer(jan_2023_climate, vis_params, 'Temperature (K)')
m

Map(center=[30, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text…

In [60]:
%pip install -q --upgrade altair
import altair as alt
cities = ee.FeatureCollection([
    ee.Feature(ee.Geometry.Point(10.75, 59.91), {'city': 'Oslo'}),
    ee.Feature(ee.Geometry.Point(-118.24, 34.05), {'city': 'Los Angeles'}),
    ee.Feature(ee.Geometry.Point(103.83, 1.33), {'city': 'Singapore'}),
])
cities

In [61]:
city_climates = jan_2023_climate.reduceRegions(cities, ee.Reducer.first())

city_climates_dataframe = ee.data.computeFeatures(
    {'expression': city_climates, 'fileFormat': 'PANDAS_DATAFRAME'}
)
city_climates_dataframe

Unnamed: 0,geo,city,dewpoint_temperature_2m,dewpoint_temperature_2m_max,dewpoint_temperature_2m_min,evaporation_from_bare_soil_max,evaporation_from_bare_soil_min,evaporation_from_bare_soil_sum,evaporation_from_open_water_surfaces_excluding_oceans_max,evaporation_from_open_water_surfaces_excluding_oceans_min,...,volumetric_soil_water_layer_1_min,volumetric_soil_water_layer_2,volumetric_soil_water_layer_2_max,volumetric_soil_water_layer_2_min,volumetric_soil_water_layer_3,volumetric_soil_water_layer_3_max,volumetric_soil_water_layer_3_min,volumetric_soil_water_layer_4,volumetric_soil_water_layer_4_max,volumetric_soil_water_layer_4_min
0,"{'type': 'Point', 'coordinates': [10.75, 59.91]}",Oslo,268.522069,277.052826,255.988235,2.328306e-10,-2.8e-05,-0.000109,6.047194e-07,-2.328306e-10,...,0.240875,0.277464,0.339737,0.24205,0.288395,0.312698,0.26355,0.283256,0.291718,0.26976
1,"{'type': 'Point', 'coordinates': [-118.24, 34....",Los Angeles,279.018688,286.388687,265.190689,2.328306e-10,-0.000191,-0.016581,1.587765e-05,-0.0002093231,...,0.293442,0.378105,0.438995,0.267899,0.338553,0.41066,0.196136,0.217798,0.233704,0.208649
2,"{'type': 'Point', 'coordinates': [103.83, 1.33]}",Singapore,296.26942,298.315628,294.220779,0.0,-0.000365,-0.036704,1.164153e-10,-0.0001259167,...,0.393356,0.436111,0.520004,0.405411,0.436325,0.519806,0.401047,0.496528,0.510345,0.489883


In [62]:
alt.Chart(city_climates_dataframe).mark_bar(size=100).encode(
    alt.X('city:N', sort='y', axis=alt.Axis(labelAngle=0), title='City'),
    alt.Y('temperature_2m:Q', title='Temperature (K)'),
    tooltip=[
        alt.Tooltip('city:N', title='City'),
        alt.Tooltip('temperature_2m:Q', title='Temperature (K)'),
    ],
).properties(title='January 2023 temperature for selected cities', width=500)

In [63]:
# Create arbitrary constant images.
constant_1 = ee.Image(1)
constant_2 = ee.Image(2)

# Create a collection by giving a list to the constructor.
collection_from_constructor = ee.ImageCollection([constant_1, constant_2])
display('Collection from constructor:', collection_from_constructor)

# Create a collection with fromImages().
collection_from_images = ee.ImageCollection.fromImages(
    [ee.Image(3), ee.Image(4)]
)
display('Collection from images:', collection_from_images)

# Merge two collections.
merged_collection = collection_from_constructor.merge(collection_from_images)
display('Merged collection:', merged_collection)

# Create a toy FeatureCollection
features = ee.FeatureCollection(
    [ee.Feature(None, {'foo': 1}), ee.Feature(None, {'foo': 2})]
)

# Create an ImageCollection from the FeatureCollection
# by mapping a function over the FeatureCollection.
images = features.map(lambda feature: ee.Image(ee.Number(feature.get('foo'))))

# Display the resultant collection.
display('Image collection:', images)

'Collection from constructor:'

'Collection from images:'

'Merged collection:'

'Image collection:'

In [64]:
# All the GeoTiffs are in this folder.
uri_base = (
    'gs://gcp-public-data-landsat/LC08/01/001/002/'
    + 'LC08_L1GT_001002_20160817_20170322_01_T2/'
)

# List of URIs, one for each band.
uris = ee.List([
    uri_base + 'LC08_L1GT_001002_20160817_20170322_01_T2_B2.TIF',
    uri_base + 'LC08_L1GT_001002_20160817_20170322_01_T2_B3.TIF',
    uri_base + 'LC08_L1GT_001002_20160817_20170322_01_T2_B4.TIF',
    uri_base + 'LC08_L1GT_001002_20160817_20170322_01_T2_B5.TIF',
])

# Make a collection from the list of images.
images = uris.map(lambda uri: ee.Image.loadGeoTIFF(uri))
collection = ee.ImageCollection(images)

# Get an RGB image from the collection of bands.
rgb = collection.toBands().rename(['B2', 'B3', 'B4', 'B5'])
m = geemap.Map()
m.center_object(rgb)
m.add_layer(rgb, {'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 20000}, 'rgb')
m

Map(center=[0, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text'…

In [None]:
'''
Satellites in interest
NASA/ORNL/DAYMET_V4 1981/01/01 - 2022/12/31
NOAA/CFSV2/FOR6H 1977/01/01 - 2022/12/31
ECMWF/CAMS/NRT - 2017/01/01 - 2022/12/31

TASK get their images , 3 images per satteliet per month, total 9 images per month, extract as jpg, save into google cloud bucket
'''


In [None]:
# prompt: ee.Initialize(project='gemini-genai-454500')
# # Define your Google Cloud Storage bucket and folder
# bucket_name = 'satellite-images-genai'  # Replace with your bucket name
# folder_name = 'satellite_images'
# # Define the satellite image collections and their date ranges
# satellite_collections = {
#     'NASA/ORNL/DAYMET_V4': ('1981-01-01', '2022-12-31'),
#     'NOAA/CFSV2/FOR6H': ('1977-01-01', '2022-12-31'),
#     'ECMWF/CAMS/NRT': ('2017-01-01', '2022-12-31'),
# }
# # Define the region for North America (bounding box)
# region = ee.Geometry.Rectangle([-170, 15, -50, 70])  # North America
# # Visualization parameters for specific datasets
# def get_vis_params(collection_id):
#     if collection_id == 'ECMWF/CAMS/NRT':
#         return {
#             'bands': ['total_aerosol_optical_depth_at_550nm_surface'],
#             'min': 0.0,
#             'max': 3.6,
#             'palette': [
#                 '5e4fa2', '3288bd', '66c2a5', 'abe0a4', 'e6f598', 'ffffbf',
#                 'fee08b', 'fdae61', 'f46d43', 'd53e4f', '9e0142'
#             ]
#         }
#     elif collection_id == 'NASA/ORNL/DAYMET_V4':
#         return {
#             'bands': ['tmax'],
#             'min': -40.0,
#             'max': 30.0,
#             'palette': ['1621A2', 'white', 'cyan', 'green', 'yellow', 'orange', 'red']
#         }
#     elif collection_id == 'NOAA/CFSV2/FOR6H':
#         return {
#             'bands': ['Temperature_height_above_ground'],
#             'min': 220.0,
#             'max': 310.0,
#             'palette': ['blue', 'purple', 'cyan', 'green', 'yellow', 'red']
#         }
#     else:
#         return {}
# TASK - Every month from metnioned dates, take 3 pictures of different dates from each satellite, total 9 images, save them as jpg in bucket with foldername satelliteimages/<year-month>

import ee
import datetime
import os

# Initialize Earth Engine
ee.Authenticate()
ee.Initialize(project='gemini-genai-454500')

# Define your Google Cloud Storage bucket and folder
bucket_name = 'satellite-images-genai'  # Replace with your bucket name
folder_name = 'satellite_images'

# Define the satellite image collections and their date ranges
satellite_collections = {
    'NASA/ORNL/DAYMET_V4': ('1981-01-01', '2022-12-31'),
    'NOAA/CFSV2/FOR6H': ('1977-01-01', '2022-12-31'),
    'ECMWF/CAMS/NRT': ('2017-01-01', '2022-12-31'),
}

# Define the region for North America (bounding box)
region = ee.Geometry.Rectangle([-170, 15, -50, 70])  # North America

# Visualization parameters for specific datasets
def get_vis_params(collection_id):
    if collection_id == 'ECMWF/CAMS/NRT':
        return {
            'bands': ['total_aerosol_optical_depth_at_550nm_surface'],
            'min': 0.0,
            'max': 3.6,
            'palette': [
                '5e4fa2', '3288bd', '66c2a5', 'abe0a4', 'e6f598', 'ffffbf',
                'fee08b', 'fdae61', 'f46d43', 'd53e4f', '9e0142'
            ]
        }
    elif collection_id == 'NASA/ORNL/DAYMET_V4':
        return {
            'bands': ['tmax'],
            'min': -40.0,
            'max': 30.0,
            'palette': ['1621A2', 'white', 'cyan', 'green', 'yellow', 'orange', 'red']
        }
    elif collection_id == 'NOAA/CFSV2/FOR6H':
        return {
            'bands': ['Temperature_height_above_ground'],
            'min': 220.0,
            'max': 310.0,
            'palette': ['blue', 'purple', 'cyan', 'green', 'yellow', 'red']
        }
    else:
        return {}


def export_satellite_images(collection_id, start_date, end_date, region):
  """Exports satellite images to Google Cloud Storage."""
  collection = ee.ImageCollection(collection_id).filterDate(start_date, end_date)
  vis_params = get_vis_params(collection_id)
  if not vis_params:
    print(f"No visualization parameters found for {collection_id}. Skipping...")
    return

  image_list = collection.toList(collection.size())

  for i in range(min(3, collection.size().getInfo())):
      image = ee.Image(image_list.get(i))
      date_str = image.date().format('YYYY-MM-dd').getInfo()
      year_month = date_str[:7]
      output_filename = f'{folder_name}/{year_month}/image_{collection_id[:4]}.jpg'


      task = ee.batch.Export.image.toCloudStorage(
          image=image.visualize(**vis_params),
          description = f"Exporting_{collection_id[:4]}"[:50],
          bucket=bucket_name,
          fileNamePrefix=output_filename,
          region=region.getInfo()['coordinates'],
          fileFormat='GEO_TIFF',
          maxPixels=1e13
      )
      task.start()
      print(f'Exporting {collection_id} image to {output_filename}...')




# Iterate through satellite collections and export images
for collection_id, (start_date, end_date) in satellite_collections.items():
  start_year = int(start_date[:4])
  end_year = int(end_date[:4])
  for year in range(start_year, end_year + 1):
    for month in range(1, 13):
      start_month_date = datetime.date(year, month, 1).strftime('%Y-%m-%d')
      end_month_date = (datetime.date(year, month + 1, 1)
                      if month < 12 else datetime.date(year + 1, 1, 1)).strftime('%Y-%m-%d')
      export_satellite_images(collection_id, start_month_date, end_month_date, region)


Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-01/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-01/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-01/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-02/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-02/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-02/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-03/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-03/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-03/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-04/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-04/image_NASA.jpg...
Exporting NASA/ORNL/DAYMET_V4 image to satellite_images/1981-04/image_NASA.jpg...
Exporting NASA/O