In [1]:
import pandas as pd

# Load the data
file_path = 'May_to_Sep_temp.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
data.head()


Unnamed: 0,station,date,max_tmpf,min_tmpf,name,geometry
0,MN001,2023-05-01,55.94002,37.94002,Twin Lakes I-35 Mile Post 1,"{'type': 'Point', 'coordinates': [-93.35405731..."
1,MN002,2023-05-01,49.46001,37.040024,Silver Lake TH 7 Mile Post 1,"{'type': 'Point', 'coordinates': [-94.1191, 44..."
2,MN003,2023-05-01,55.21998,39.19999,Little Chicago I-35 Mile Post 70,"{'type': 'Point', 'coordinates': [-93.29242706..."
3,MN004,2023-05-01,52.519978,38.299988,Rush City I-35 Mile Post 157,"{'type': 'Point', 'coordinates': [-92.99275207..."
4,MN005,2023-05-01,50.18,41.89999,Rutledge I-35 Mile Post 198,"{'type': 'Point', 'coordinates': [-92.83856201..."


In [6]:
def sort_by_station_and_date(df):
    # Sort the DataFrame by 'station' and 'date' columns
    sorted_df = df.sort_values(by=['station', 'date'])
    return sorted_df

# Applying the function to sort by station and date
sorted_data = sort_by_station_and_date(data)

# Saving the sorted data to a CSV file
sorted_file_path = 'sorted_tem_data_by_station_and_date.csv'
sorted_data.to_csv(sorted_file_path, index=False)

sorted_file_path



'sorted_tem_data_by_station_and_date.csv'

In [8]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('sorted_tem_data_by_station_and_date.csv')

# Function to extract X and Y coordinates from geometry column
def extract_coordinates(geometry):
    if pd.isna(geometry):
        return [None, None]
    else:
        coordinates = eval(geometry)['coordinates']
        return coordinates if len(coordinates) == 2 else [None, None]

# Apply function to extract X and Y coordinates
df[['X', 'Y']] = pd.DataFrame(df['geometry'].apply(extract_coordinates).tolist())

# Drop the original 'geometry' column
df.drop(columns=['geometry'], inplace=True)

# Save the DataFrame to a new CSV file
csv_file_path = 'data_with_XY_coordinates.csv'
df.to_csv(csv_file_path, index=False)

print(f"DataFrame saved to CSV file: {csv_file_path}")


DataFrame saved to CSV file: data_with_XY_coordinates.csv


In [26]:
def create_station_datasets(df, station_column):
    # Dictionary to store DataFrames for each station
    station_dfs = {}
    
    # Iterate over each unique station ID
    for station in df[station_column].unique():
        # Filter the DataFrame for the current station and save it to the dictionary
        station_df = df[df[station_column] == station]
        station_dfs[station] = station_df
        
        # Optionally, save each station's DataFrame to a CSV file
        file_path = f'Sorted data GDD/station_{station}.csv'
        station_df.to_csv(file_path, index=False)
        print(f'Saved: {file_path}')
        
    return station_dfs

# Load the original dataset
file_path = 'data_with_XY_coordinates.csv'
data = pd.read_csv(file_path)


In [10]:
import pandas as pd

# Assuming this function needs to be defined:
def celsius_to_fahrenheit(temp_c):
    return (temp_c * 9/5) + 32


class CornGDD:
    def __init__(self, daily_temp_df, time_column, min_temp_column, max_temp_column, fahrenheit=True):
        self.daily_temp_df = daily_temp_df
        self.time_column = time_column
        self.min_temp_column = min_temp_column
        self.max_temp_column = max_temp_column
        self.fahrenheit = fahrenheit

        # Ensure the time column is of datetime type
        self.daily_temp_df[self.time_column] = pd.to_datetime(self.daily_temp_df[self.time_column])

        # Convert the temperature to Celsius if required
        if not self.fahrenheit:
            self.daily_temp_df[self.min_temp_column] = self.daily_temp_df[self.min_temp_column].apply(celsius_to_fahrenheit)
            self.daily_temp_df[self.max_temp_column] = self.daily_temp_df[self.max_temp_column].apply(celsius_to_fahrenheit)
    
    def con_gdd(self, start_date, end_date):
        start_date = pd.Timestamp(start_date)
        end_date = pd.Timestamp(end_date)

        date_mask = (self.daily_temp_df[self.time_column] >= start_date) & (self.daily_temp_df[self.time_column] <= end_date)
        masked_data = self.daily_temp_df.loc[date_mask].copy()

        gdd_values = []
        cumulative_gdd = 0

        for index, row in masked_data.iterrows():
            min_temp = max(row[self.min_temp_column], 50 if self.fahrenheit else 10)
            max_temp = min(row[self.max_temp_column], 86 if self.fahrenheit else 30)

            mean_temp = (min_temp + max_temp) / 2
            base_temp = 50 if self.fahrenheit else 10
            gdd = max(mean_temp - base_temp, 0)

            gdd_values.append(gdd)
            cumulative_gdd += gdd

        masked_data['GDD'] = gdd_values
        masked_data['Cumulative_GDD'] = masked_data['GDD'].cumsum()

        return masked_data


In [11]:
MN001 = pd.read_csv('Sorted data GDD/station_MN001.csv')
MN002 = pd.read_csv('Sorted data GDD/station_MN002.csv')
MN003 = pd.read_csv('Sorted data GDD/station_MN003.csv')
MN004 = pd.read_csv('Sorted data GDD/station_MN004.csv')
MN005 = pd.read_csv('Sorted data GDD/station_MN005.csv')
MN006 = pd.read_csv('Sorted data GDD/station_MN006.csv')
MN007 = pd.read_csv('Sorted data GDD/station_MN007.csv')
MN008 = pd.read_csv('Sorted data GDD/station_MN008.csv')
MN009 = pd.read_csv('Sorted data GDD/station_MN009.csv')
MN010 = pd.read_csv('Sorted data GDD/station_MN010.csv')
MN011 = pd.read_csv('Sorted data GDD/station_MN011.csv')
MN012 = pd.read_csv('Sorted data GDD/station_MN012.csv')
MN013 = pd.read_csv('Sorted data GDD/station_MN013.csv')
MN014 = pd.read_csv('Sorted data GDD/station_MN014.csv')
MN015 = pd.read_csv('Sorted data GDD/station_MN015.csv')
MN016 = pd.read_csv('Sorted data GDD/station_MN016.csv')
MN017 = pd.read_csv('Sorted data GDD/station_MN017.csv')
MN018 = pd.read_csv('Sorted data GDD/station_MN018.csv')
MN019 = pd.read_csv('Sorted data GDD/station_MN019.csv')
MN020 = pd.read_csv('Sorted data GDD/station_MN020.csv')
MN021 = pd.read_csv('Sorted data GDD/station_MN021.csv')
MN022 = pd.read_csv('Sorted data GDD/station_MN022.csv')
MN023 = pd.read_csv('Sorted data GDD/station_MN023.csv')
MN024 = pd.read_csv('Sorted data GDD/station_MN024.csv')
MN025 = pd.read_csv('Sorted data GDD/station_MN025.csv')
MN026 = pd.read_csv('Sorted data GDD/station_MN026.csv')
MN027 = pd.read_csv('Sorted data GDD/station_MN027.csv')
MN028 = pd.read_csv('Sorted data GDD/station_MN028.csv')
MN029 = pd.read_csv('Sorted data GDD/station_MN029.csv')
MN030 = pd.read_csv('Sorted data GDD/station_MN030.csv')
MN031 = pd.read_csv('Sorted data GDD/station_MN031.csv')
MN032 = pd.read_csv('Sorted data GDD/station_MN032.csv')
MN033 = pd.read_csv('Sorted data GDD/station_MN033.csv')
MN034 = pd.read_csv('Sorted data GDD/station_MN034.csv')
MN035 = pd.read_csv('Sorted data GDD/station_MN035.csv')
MN036 = pd.read_csv('Sorted data GDD/station_MN036.csv')
MN037 = pd.read_csv('Sorted data GDD/station_MN037.csv')
MN038 = pd.read_csv('Sorted data GDD/station_MN038.csv')
MN039 = pd.read_csv('Sorted data GDD/station_MN039.csv')
MN040 = pd.read_csv('Sorted data GDD/station_MN040.csv')
MN041 = pd.read_csv('Sorted data GDD/station_MN041.csv')
MN042 = pd.read_csv('Sorted data GDD/station_MN042.csv')
MN043 = pd.read_csv('Sorted data GDD/station_MN043.csv')
MN044 = pd.read_csv('Sorted data GDD/station_MN044.csv')
MN045 = pd.read_csv('Sorted data GDD/station_MN045.csv')
MN046 = pd.read_csv('Sorted data GDD/station_MN046.csv')
MN047 = pd.read_csv('Sorted data GDD/station_MN047.csv')
MN048 = pd.read_csv('Sorted data GDD/station_MN048.csv')
MN049 = pd.read_csv('Sorted data GDD/station_MN049.csv')
MN050 = pd.read_csv('Sorted data GDD/station_MN050.csv')
MN051 = pd.read_csv('Sorted data GDD/station_MN051.csv')
MN052 = pd.read_csv('Sorted data GDD/station_MN052.csv')
MN053 = pd.read_csv('Sorted data GDD/station_MN053.csv')
MN054 = pd.read_csv('Sorted data GDD/station_MN054.csv')
MN055 = pd.read_csv('Sorted data GDD/station_MN055.csv')
MN056 = pd.read_csv('Sorted data GDD/station_MN056.csv')
MN057 = pd.read_csv('Sorted data GDD/station_MN057.csv')
MN058 = pd.read_csv('Sorted data GDD/station_MN058.csv')
MN059 = pd.read_csv('Sorted data GDD/station_MN059.csv')
MN060 = pd.read_csv('Sorted data GDD/station_MN060.csv')
MN061 = pd.read_csv('Sorted data GDD/station_MN061.csv')
MN062 = pd.read_csv('Sorted data GDD/station_MN062.csv')
MN063 = pd.read_csv('Sorted data GDD/station_MN063.csv')
MN064 = pd.read_csv('Sorted data GDD/station_MN064.csv')
MN065 = pd.read_csv('Sorted data GDD/station_MN065.csv')
MN066 = pd.read_csv('Sorted data GDD/station_MN066.csv')
MN067 = pd.read_csv('Sorted data GDD/station_MN067.csv')
MN068 = pd.read_csv('Sorted data GDD/station_MN068.csv')
MN069 = pd.read_csv('Sorted data GDD/station_MN069.csv')
MN070 = pd.read_csv('Sorted data GDD/station_MN070.csv')
MN071 = pd.read_csv('Sorted data GDD/station_MN071.csv')
MN072 = pd.read_csv('Sorted data GDD/station_MN072.csv')
MN073 = pd.read_csv('Sorted data GDD/station_MN073.csv')
MN074 = pd.read_csv('Sorted data GDD/station_MN074.csv')
MN075 = pd.read_csv('Sorted data GDD/station_MN075.csv')
MN076 = pd.read_csv('Sorted data GDD/station_MN076.csv')
MN077 = pd.read_csv('Sorted data GDD/station_MN077.csv')
MN078 = pd.read_csv('Sorted data GDD/station_MN078.csv')
MN079 = pd.read_csv('Sorted data GDD/station_MN079.csv')
MN080 = pd.read_csv('Sorted data GDD/station_MN080.csv')
MN081 = pd.read_csv('Sorted data GDD/station_MN081.csv')
MN082 = pd.read_csv('Sorted data GDD/station_MN082.csv')
MN083 = pd.read_csv('Sorted data GDD/station_MN083.csv')
MN084 = pd.read_csv('Sorted data GDD/station_MN084.csv')
MN085 = pd.read_csv('Sorted data GDD/station_MN085.csv')
MN086 = pd.read_csv('Sorted data GDD/station_MN086.csv')
MN087 = pd.read_csv('Sorted data GDD/station_MN087.csv')
MN088 = pd.read_csv('Sorted data GDD/station_MN088.csv')
MN089 = pd.read_csv('Sorted data GDD/station_MN089.csv')
MN090 = pd.read_csv('Sorted data GDD/station_MN090.csv')
MN091 = pd.read_csv('Sorted data GDD/station_MN091.csv')
MN092 = pd.read_csv('Sorted data GDD/station_MN092.csv')
MN093 = pd.read_csv('Sorted data GDD/station_MN093.csv')
MN094 = pd.read_csv('Sorted data GDD/station_MN094.csv')
MN095 = pd.read_csv('Sorted data GDD/station_MN095.csv')
MN096 = pd.read_csv('Sorted data GDD/station_MN096.csv')
MN097 = pd.read_csv('Sorted data GDD/station_MN097.csv')
MN098 = pd.read_csv('Sorted data GDD/station_MN098.csv')
MN100 = pd.read_csv('Sorted data GDD/station_MN100.csv')
MN101 = pd.read_csv('Sorted data GDD/station_MN101.csv')
MN103 = pd.read_csv('Sorted data GDD/station_MN103.csv')
MN105 = pd.read_csv('Sorted data GDD/station_MN105.csv')
MN106 = pd.read_csv('Sorted data GDD/station_MN106.csv')
MN108 = pd.read_csv('Sorted data GDD/station_MN108.csv')
MN111 = pd.read_csv('Sorted data GDD/station_MN111.csv')
MN113 = pd.read_csv('Sorted data GDD/station_MN113.csv')
MN114 = pd.read_csv('Sorted data GDD/station_MN114.csv')
MN115 = pd.read_csv('Sorted data GDD/station_MN115.csv')
MN116 = pd.read_csv('Sorted data GDD/station_MN116.csv')
MN117 = pd.read_csv('Sorted data GDD/station_MN117.csv')
MN118 = pd.read_csv('Sorted data GDD/station_MN118.csv')
MN119 = pd.read_csv('Sorted data GDD/station_MN119.csv')
MN120 = pd.read_csv('Sorted data GDD/station_MN120.csv')
MN121 = pd.read_csv('Sorted data GDD/station_MN121.csv')
MN122 = pd.read_csv('Sorted data GDD/station_MN122.csv')
MN124 = pd.read_csv('Sorted data GDD/station_MN124.csv')
MN125 = pd.read_csv('Sorted data GDD/station_MN125.csv')
MN127 = pd.read_csv('Sorted data GDD/station_MN127.csv')
MN128 = pd.read_csv('Sorted data GDD/station_MN128.csv')
MN129 = pd.read_csv('Sorted data GDD/station_MN129.csv')
MN130 = pd.read_csv('Sorted data GDD/station_MN130.csv')
MN131 = pd.read_csv('Sorted data GDD/station_MN131.csv')
MN132 = pd.read_csv('Sorted data GDD/station_MN132.csv')
MN133 = pd.read_csv('Sorted data GDD/station_MN133.csv')
MN134 = pd.read_csv('Sorted data GDD/station_MN134.csv')
MN135 = pd.read_csv('Sorted data GDD/station_MN135.csv')
MN136 = pd.read_csv('Sorted data GDD/station_MN136.csv')
MN137 = pd.read_csv('Sorted data GDD/station_MN137.csv')
MN138 = pd.read_csv('Sorted data GDD/station_MN138.csv')
MN139 = pd.read_csv('Sorted data GDD/station_MN139.csv')
MN140 = pd.read_csv('Sorted data GDD/station_MN140.csv')
MN141 = pd.read_csv('Sorted data GDD/station_MN141.csv')
MN142 = pd.read_csv('Sorted data GDD/station_MN142.csv')
MN143 = pd.read_csv('Sorted data GDD/station_MN143.csv')
MN144 = pd.read_csv('Sorted data GDD/station_MN144.csv')
MN145 = pd.read_csv('Sorted data GDD/station_MN145.csv')
MN146 = pd.read_csv('Sorted data GDD/station_MN146.csv')
MN147 = pd.read_csv('Sorted data GDD/station_MN147.csv')
MN148 = pd.read_csv('Sorted data GDD/station_MN148.csv')
MN149 = pd.read_csv('Sorted data GDD/station_MN149.csv')
MN150 = pd.read_csv('Sorted data GDD/station_MN150.csv')
MN151 = pd.read_csv('Sorted data GDD/station_MN151.csv')
MN152 = pd.read_csv('Sorted data GDD/station_MN152.csv')
MN153 = pd.read_csv('Sorted data GDD/station_MN153.csv')
MN154 = pd.read_csv('Sorted data GDD/station_MN154.csv')
MN155 = pd.read_csv('Sorted data GDD/station_MN155.csv')
MN156 = pd.read_csv('Sorted data GDD/station_MN156.csv')
MN157 = pd.read_csv('Sorted data GDD/station_MN157.csv')
MN158 = pd.read_csv('Sorted data GDD/station_MN158.csv')
MN159 = pd.read_csv('Sorted data GDD/station_MN159.csv')
MN160 = pd.read_csv('Sorted data GDD/station_MN160.csv')
MN161 = pd.read_csv('Sorted data GDD/station_MN161.csv')
MN162 = pd.read_csv('Sorted data GDD/station_MN162.csv')


In [14]:
#Input data using CornGDD instance for all 16 weather stations
corn_MN001 = CornGDD(MN001, time_column = 'date', min_temp_column = 'min_tmpf',max_temp_column = 'max_tmpf')
start_date = pd.to_datetime("2023-05-01").date()
end_date = pd.to_datetime("2023-09-30").date()
corn_gdd_MN001 = corn_MN001.con_gdd(start_date, end_date)

In [52]:
#cumulative GDD and Accumulated GDD are same

In [28]:
print(results_df.columns)


Index(['Station', 'GDD'], dtype='object')


In [33]:
import pandas as pd

# Dictionary to store GDD results for each station
corn_gdd_dict = {}

# Loop through each station
for station_id in range(1, 163):
    station_id_str = f"MN{station_id:03d}"  # Format station ID with leading zeros if needed
    file_path = f'Sorted data GDD/station_{station_id_str}.csv'
    
    try:
        # Read CSV file into DataFrame
        station_data = pd.read_csv(file_path)
        
        # Set up CornGDD instance for the current station
        corn_station = CornGDD(station_data, time_column='date', min_temp_column='min_tmpf', max_temp_column='max_tmpf')
        
        # Set start and end dates
        start_date = pd.to_datetime("2023-05-01").date()
        end_date = pd.to_datetime("2023-09-30").date()
        
        # Calculate GDD
        corn_gdd = corn_station.con_gdd(start_date, end_date)
        
        # Store GDD in the dictionary with station ID as key
        corn_gdd_dict[station_id_str] = corn_gdd
    
    except FileNotFoundError:
        print(f"File not found for station {station_id_str}. Skipping...")
        continue


File not found for station MN099. Skipping...
File not found for station MN102. Skipping...
File not found for station MN104. Skipping...
File not found for station MN107. Skipping...
File not found for station MN109. Skipping...
File not found for station MN110. Skipping...
File not found for station MN112. Skipping...
File not found for station MN123. Skipping...
File not found for station MN126. Skipping...


In [37]:
# Loop through each station and save its data to a CSV file
for station_id, gdd_data in corn_gdd_dict.items():
    file_name = f'{station_id}_gdd.csv'
    file_path = f'GDD_results/{file_name}'
    gdd_data.to_csv(file_path, index=False)


In [41]:
import pandas as pd

# Initialize an empty list to store data for September 30th from all stations
all_data_0930 = []

# Loop through each station's GDD data
for station_id, gdd_data in corn_gdd_dict.items():
    # Extract data for September 30th
    gdd_data_0930 = gdd_data.loc[gdd_data.index == "2023-09-30"]
    
    # Add station ID as a column
    gdd_data_0930["Station_ID"] = station_id
    
    # Append the extracted data to the list
    all_data_0930.append(gdd_data_0930)

# Concatenate all the extracted data into one DataFrame
combined_data = pd.concat(all_data_0930)

# Reset the index of the combined DataFrame
combined_data.reset_index(drop=True, inplace=True)

# Save the combined data to a CSV file
combined_file_path = 'GDD_results/combined_data_0930.csv'
combined_data.to_csv(combined_file_path, index=False)


In [43]:
import pandas as pd

# Initialize an empty DataFrame to store data for September 30th from all stations
combined_data = pd.DataFrame()

# Loop through each station's GDD data
for station_id, gdd_data in corn_gdd_dict.items():
    # Filter data for September 30th
    gdd_data_0930 = gdd_data[gdd_data['date'] == "2023-09-30"]
    
    # Add station ID as a column
    gdd_data_0930["Station_ID"] = station_id
    
    # Append the filtered data to the combined DataFrame
    combined_data = pd.concat([combined_data, gdd_data_0930])

# Reset the index of the combined DataFrame
combined_data.reset_index(drop=True, inplace=True)

# Save the combined data to a CSV file
combined_file_path = 'GDD_results/combined_data_0930.csv'
combined_data.to_csv(combined_file_path, index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdd_data_0930["Station_ID"] = station_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdd_data_0930["Station_ID"] = station_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdd_data_0930["Station_ID"] = station_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[