Script for cleaning NOAA datasets
- Larry Horowitz
- larry.horowitz@noaa.gov
- lon=261.875 (equivalent to 261.875-360 = -98.125)

Four Scenarios:
1. Scenario: RCP2.6_SSP1
2. Scenario: RCP4.5_SSP2
3. Scenario: RCP7.0_SSP3
4. Scenario: RCP8.5_SSP5

Naming Convention:
- Crop 1:
    - Rio Grande Valley Sector Texas
    - Latitude = 26.051824
    - Longitude = -98.086728

- Crop 2:
    - Tucson Valley Sector
    - Latitude: 31.265639
    - Longitude: -110.897629

- Crop 3:
    - San Diego Sector
    - Latitude: 32.519967
    - Longitude: -116.850982

In [65]:
# Libraries
import pandas as pd
import matplotlib.pyplot as plt

In [66]:
rio_grande_df = pd.read_csv('RCP8.5_SSP5/tas_Amon_GFDL-ESM4_ssp585_r1i1p1f1_gr1_201501-210012.crop3.csv')

In [67]:
rio_grande_df.head()

Unnamed: 0,YYYYMM,LAT,LON,TAS
0,201501,30.5,241.88,287.22
1,201501,30.5,243.13,287.22
2,201501,30.5,244.38,284.6
3,201501,31.5,241.88,286.81
4,201501,31.5,243.13,285.87


In [68]:
rio_grande_df.dtypes

YYYYMM      int64
 LAT      float64
 LON      float64
 TAS      float64
dtype: object

In [69]:
# Parsing YYYYMM column to datetime data type format
rio_grande_df['YYYYMM'] = pd.to_datetime(rio_grande_df['YYYYMM'], format= '%Y%m')
# Reformatting YYYYMM column
rio_grande_df['YYYYMM'] = rio_grande_df['YYYYMM'].dt.strftime('%d-%m-%Y')
# Creating column in Celsius
rio_grande_df['temp_celsius'] = rio_grande_df[' TAS'] - 273.15
# Creating column in Fahrenheit
rio_grande_df['temp_fahrenheit'] = rio_grande_df[' TAS'] * (9/5) - 459.67
# Reformatting longitude column: lon = 261.875 (equivalent to 261.875-360 = -98.125)
rio_grande_df["lon_real"] = rio_grande_df[" LON"] - 360
# Checking data types
rio_grande_df.dtypes
# Renaming columns
rio_grande_df = rio_grande_df.rename(columns={'YYYYMM': 'Date',
                                               ' LAT': 'old_Latitude',
                                               ' LON': 'old_Longitude',
                                               ' TAS': 'Temperature_at_atm',
                                               'temp_celsius': 'temp_celsius',
                                               'temp_fahrenheit': 'temp_fahrenheit',
                                               'lon_real': 'new_Longitude'})
# Printing data types
print("///////////")
rio_grande_df.dtypes

///////////


Date                   object
old_Latitude          float64
old_Longitude         float64
Temperature_at_atm    float64
temp_celsius          float64
temp_fahrenheit       float64
new_Longitude         float64
dtype: object

In [70]:
# Printing
print("///////////")
rio_grande_df

///////////


Unnamed: 0,Date,old_Latitude,old_Longitude,Temperature_at_atm,temp_celsius,temp_fahrenheit,new_Longitude
0,01-01-2015,30.5,241.88,287.22,14.07,57.326,-118.12
1,01-01-2015,30.5,243.13,287.22,14.07,57.326,-116.87
2,01-01-2015,30.5,244.38,284.60,11.45,52.610,-115.62
3,01-01-2015,31.5,241.88,286.81,13.66,56.588,-118.12
4,01-01-2015,31.5,243.13,285.87,12.72,54.896,-116.87
...,...,...,...,...,...,...,...
12379,01-12-2100,32.5,243.13,288.03,14.88,58.784,-116.87
12380,01-12-2100,32.5,244.38,285.96,12.81,55.058,-115.62
12381,01-12-2100,33.5,241.88,289.23,16.08,60.944,-118.12
12382,01-12-2100,33.5,243.13,284.53,11.38,52.484,-116.87


In [71]:
# Printing the description
print("///////////")
rio_grande_df.describe()

///////////


Unnamed: 0,old_Latitude,old_Longitude,Temperature_at_atm,temp_celsius,temp_fahrenheit,new_Longitude
count,12384.0,12384.0,12384.0,12384.0,12384.0,12384.0
mean,32.0,243.13,292.821021,19.671021,67.407837,-116.87
std,1.118079,1.020662,5.838873,5.838873,10.509972,1.020662
min,30.5,241.88,276.88,3.73,38.714,-118.12
25%,31.25,241.88,288.64,15.49,59.882,-118.12
50%,32.0,243.13,292.04,18.89,66.002,-116.87
75%,32.75,244.38,296.91,23.76,74.768,-115.62
max,33.5,244.38,313.87,40.72,105.296,-115.62


In [72]:
rio_grande_df.to_csv("san_diego_final_RCP8.5_SSP5.csv", index = False) # False to exclude row indices