In [15]:
# The California Irrigation Management Integration System (CIMIS), is not a great data source for weather data. 
# Much of the data collection is not conistent across CA stations.  Other weather data sources will be explored.  
# The introductory EDA for CIMIS is below. 

import pandas as pd
import numpy as np

In [16]:
#Very small subset of data that was webscraped off https://eyes.nasa.gov/dsn/dsn.html
rawdata06252019 = pd.read_csv('DSN_webdata_06_25_2019.csv')

In [17]:
rawdata06252019.shape

(24611, 28)

In [18]:
# What is the best feature sequence in the dataframe?
feature_list = list(rawdata06252019.columns)
print(type(feature_list))

<class 'list'>


In [6]:
feature_list

['spacecraft_id',
 'spacecraft',
 'station',
 'friendlyName',
 'timeZoneOffset',
 'name_x',
 'azimuthAngle',
 'elevationAngle',
 'created',
 'isArray',
 'isDDOR',
 'isMSPA',
 'timeUTC',
 'updated',
 'windSpeed',
 'dataRate_x',
 'frequency_x',
 'power_x',
 'signalType_x',
 'signalTypeDebug_x',
 'dataRate_y',
 'frequency_y',
 'power_y',
 'signalType_y',
 'signalTypeDebug_y',
 'downlegRange',
 'rtlt',
 'uplegRange']

In [19]:
# Compare wind speed and time stamp data from DSN dishes, to the various surrounding California weather station sensors.
# Since there is no API for the Deep Space Network data; I will have to webscrape specific days 
# on both ends of the weather spectrum: intense weather, and calm weather. 
# This will allow me to have a balanced subset for comparison.
# I will use the non-NASA weather data, to identify categories of weather conditions based on the 
#Marine-Beaufort wind scale.

rawdata06252019['windSpeed'].describe()   
# This is a extremely small subset of DSN data.  I need years not seconds; to get a average.

#Pamale Weather Station and Victorville Weather Station have mean wind speed times of around 4 to 5 mph (6.5-8 kph).


count    22655.000000
mean        20.893511
std         15.282240
min          0.000000
25%          3.088667
50%         25.327067
75%         33.357601
max         53.742802
Name: windSpeed, dtype: float64

In [26]:
# These are three weather stations surrounding the Goldstone, California; Deep Space Network complex.
Pamdale_Weather_Data = pd.read_csv('Weather_data_Pamdale_CA.csv')
Victorville_Weather_Data = pd.read_csv('Victorville_Weather_Data.csv')
Bakersfield_Weather_Data = pd.read_csv('Bakersfield_Weather_Data.csv')

In [21]:
Pamdale_Weather_Data.describe()

Unnamed: 0,Date,Time,Precip,type,Air max,min,obs,Wx,speed,Bulb wet,dry,Soil max,min.1,Evap,Solar,ETo,RH max,min.2
count,5288.0,5288.0,5288.0,0.0,5288.0,5288.0,0.0,0.0,5008.0,0.0,0.0,5010.0,5010.0,0.0,5240.0,5130.0,5052.0,5052.0
mean,20118140.0,2359.0,0.014964,,75.845121,43.228253,,,4.057508,,,58.417964,57.211577,,476.121565,0.188402,76.757363,26.004454
std,41831.27,0.0,0.087346,,15.307195,13.015635,,,4.189953,,,10.112886,10.003011,,173.21572,0.095918,13.668771,14.621722
min,20050100.0,2359.0,0.0,,36.0,9.0,,,0.0,,,37.0,36.0,,27.0,0.0,22.5,4.0
25%,20080810.0,2359.0,0.0,,64.0,33.0,,,0.0,,,49.0,48.0,,322.75,0.1,68.0,15.0
50%,20120330.0,2359.0,0.0,,76.0,42.0,,,4.0,,,58.0,57.0,,487.0,0.19,79.0,21.8
75%,20151110.0,2359.0,0.0,,89.0,53.0,,,7.0,,,68.0,66.0,,636.0,0.27,87.0,32.425
max,20190620.0,2359.0,1.79,,117.0,78.0,,,19.0,,,80.0,79.0,,790.0,0.45,100.0,94.0


In [26]:
Victorville_Weather_Data.describe()

Unnamed: 0,Date,Time,Precip,type,Air max,min,obs,Wx,speed,Bulb wet,dry,Soil max,min.1,Evap,Solar,ETo,RH max,min.2
count,9306.0,9306.0,9306.0,0.0,9306.0,9306.0,0.0,0.0,8731.0,0.0,0.0,8586.0,8586.0,0.0,8880.0,8902.0,8812.0,8812.0
mean,20063110.0,2341.171395,0.016914,,75.230819,46.250161,,,4.758332,,,60.209527,57.239809,,491.052477,0.187754,70.515536,23.675704
std,73572.48,165.526295,0.11707,,15.379925,12.622403,,,3.673877,,,9.971982,9.666623,,175.871454,0.095423,18.320006,15.462008
min,19940100.0,800.0,0.0,,34.0,11.0,,,0.0,,,39.0,32.0,,21.0,0.0,12.0,0.5
25%,20000520.0,2359.0,0.0,,62.0,36.0,,,2.0,,,51.0,49.0,,333.0,0.1,56.0,12.8
50%,20060930.0,2359.0,0.0,,75.0,46.0,,,5.0,,,61.0,58.0,,506.0,0.19,71.9,18.6
75%,20130210.0,2359.0,0.0,,89.0,56.0,,,7.0,,,70.0,66.0,,652.0,0.27,85.6,30.0
max,20190620.0,2359.0,3.55,,112.0,84.0,,,18.0,,,79.0,75.0,,790.0,0.41,100.0,99.2


In [22]:
Bakersfield_Weather_Data.describe()

Unnamed: 0,Date,Time,Precip,type,Air max,min,obs,Wx,Wind dir,speed,Bulb wet,dry,Soil max,min.1,Evap,Solar,ETo,RH max,min.2
count,3462.0,3462.0,3462.0,0.0,3462.0,3462.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,20143180.0,2362.63576,0.016407,,77.170999,49.659445,,,,,,,,,,,,,
std,27416.11,11.657043,0.075605,,14.827573,11.821722,,,,,,,,,,,,,
min,20100100.0,2359.0,0.0,,33.0,17.0,,,,,,,,,,,,,
25%,20120520.0,2359.0,0.0,,64.0,41.0,,,,,,,,,,,,,
50%,20140930.0,2359.0,0.0,,77.0,49.0,,,,,,,,,,,,,
75%,20170210.0,2359.0,0.0,,91.0,59.0,,,,,,,,,,,,,
max,20190620.0,2400.0,1.53,,107.0,80.0,,,,,,,,,,,,,


In [23]:
# Compare time stamps between DSN Goldstone, CA complex; and California weather stations.  
# Pay attention to mph vs kph conversions needed.  


In [32]:
Bakersfield_wind_data = Bakersfield_Weather_Data[['Station', 'Date', 'Time', 'speed']]
Victorville_wind_data = Victorville_Weather_Data[['Station', 'Date', 'Time', 'speed']]
Pamdale_wind_data = Pamdale_Weather_Data[['Station"', 'Date', 'Time', 'speed']]


In [33]:
Pamdale_wind_data.loc[Pamdale_wind_data['Date'] == 20130404]   # Date matches DSN date 

Unnamed: 0,"Station""",Date,Time,speed
3015,Palmdale.A,20130404.0,2359.0,9.0


In [34]:
Bakersfield_wind_data.loc[Bakersfield_wind_data['Date'] == 20130404]

Unnamed: 0,Station,Date,Time,speed
1189,BAKRSFLD.C,20130404.0,2359,


In [35]:
Victorville_wind_data.loc[Victorville_wind_data['Date'] == 20130404]

Unnamed: 0,Station,Date,Time,speed
7033,VICTRVIL.A,20130404.0,2359.0,8.0


In [36]:
rawdata06252019[['timeUTC', 'updated', 'name_x', 'windSpeed']].head()     
## hmmm why is Goldstone windspeed so much higher???

Unnamed: 0,timeUTC,updated,name_x,windSpeed
0,1365110000000.0,2013-04-04T19:15:27.397Z,DSS14,30.268934
1,1365110000000.0,2013-04-04T16:31:22.353Z,DSS15,30.268934
2,1365110000000.0,2013-04-04T16:31:22.353Z,DSS15,30.268934
3,1365110000000.0,2013-04-04T19:56:37.411Z,DSS27,28.415733
4,1365110000000.0,2013-04-04T19:56:45.448Z,DSS24,21.002933


In [37]:
rawdata06252019[['windSpeed']].mean()  

windSpeed    20.893511
dtype: float64