# Cloud Condition Prediction

### Importing Libraries

In [212]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings("ignore")

### Creating Dataframe

In [213]:
raw_data= pd.read_csv("train_CloudCondition.csv")
raw_data.shape

(71428, 12)

Data is of 71428 days and 11 parameters describing the weather conditions on that day

In [214]:
raw_data.head()

Unnamed: 0,Day,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
0,1,Partly Cloudy,rain,-13,-19.0,0.134364,17.0,68.0,4.0,1008.0,Frost,1068
1,2,Partly Cloudy,rain,15,5.0,0.847434,8.0,291.0,2.0,1036.0,Frost,1291
2,3,Partly Cloudy,rain,33,-12.0,0.763775,32.0,32.0,8.0,1004.0,Dry,1433
3,4,Partly Cloudy,snow,30,36.0,0.255069,15.0,130.0,3.0,1016.0,Dry,1410
4,5,Partly Cloudy,snow,27,30.0,0.495435,63.0,60.0,15.0,1007.0,Fog,1391


In [215]:
raw_data.tail()

Unnamed: 0,Day,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
71423,79996,Foggy,rain,39,31.0,0.243553,19.0,347.0,14.0,1013.0,Frost,1269
71424,79997,Foggy,rain,8,4.0,0.913108,1.0,101.0,8.0,1031.0,Dry,1224
71425,79998,Mostly Cloudy,rain,28,-22.0,0.496076,2.0,149.0,7.0,1032.0,Frost,1463
71426,79999,Mostly Cloudy,rain,-16,-3.0,0.783161,44.0,266.0,11.0,1019.0,Fog,1251
71427,80000,Mostly Cloudy,rain,-15,8.0,0.191555,38.0,154.0,6.0,1023.0,Fog,1258


In [216]:
raw_data.columns

Index(['Day', 'Cloud_Condition', 'Rain_OR_SNOW', 'Temperature (C)',
       'Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)',
       'Wind Bearing (degrees)', 'Visibility (km)', 'Pressure (millibars)',
       'Condensation', 'Solar irradiance intensity'],
      dtype='object')

In [217]:
raw_data.dtypes

Day                             int64
Cloud_Condition                object
Rain_OR_SNOW                   object
Temperature (C)                object
Apparent Temperature (C)      float64
Humidity                      float64
Wind Speed (km/h)             float64
Wind Bearing (degrees)        float64
Visibility (km)               float64
Pressure (millibars)          float64
Condensation                   object
Solar irradiance intensity      int64
dtype: object

**Data Description**

1. Day variable specifies the day of weather conditions, however it is not continous(Not all days are mentioned). Hence, for now lets start by dropping the Day variable, we might later do changes

2. Cloud_condition is the independent (Y) variable. It is a discrete variable, hence we conclude that we have labelled data to go forward with supervised form of learning(Classification)

3. 'Rain_OR_SNOW', 'Temperature (C)', 'Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)', 'Wind Bearing (degrees)', 'Visibility (km)', 'Pressure (millibars)', 'Condensation' and 'Solar irradiance intensity' are dependent(X) variables.

4. Out of the X variables, 'Rain_OR_SNOW' and 'Condensation' are categorical and rest are numerical {Note: 'Temperature (C)' will be converted to int later}



### Data Cleanup and Exploratory Data Analysis

In [218]:
raw_data_dup = raw_data[raw_data.duplicated()]
raw_data_dup

Unnamed: 0,Day,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity


There are no duplicates

In [219]:
raw_data = raw_data.iloc[:, 1:] 

In [220]:
raw_data.head(2)

Unnamed: 0,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
0,Partly Cloudy,rain,-13,-19.0,0.134364,17.0,68.0,4.0,1008.0,Frost,1068
1,Partly Cloudy,rain,15,5.0,0.847434,8.0,291.0,2.0,1036.0,Frost,1291


In [221]:
raw_data.tail(2)

Unnamed: 0,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
71426,Mostly Cloudy,rain,-16,-3.0,0.783161,44.0,266.0,11.0,1019.0,Fog,1251
71427,Mostly Cloudy,rain,-15,8.0,0.191555,38.0,154.0,6.0,1023.0,Fog,1258


In [222]:
# raw_data["Temperature (C)"] = raw_data["Temperature (C)"].astype(float)

In [223]:
raw_data.describe()

Unnamed: 0,Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Solar irradiance intensity
count,71425.0,71427.0,71426.0,71391.0,71408.0,71363.0,71428.0
mean,5.550606,0.50102,31.525831,179.221793,8.014606,1022.980606,1249.736546
std,19.093822,0.289431,18.507098,103.72921,4.907893,13.559828,144.828721
min,-27.0,1.9e-05,0.0,0.0,0.0,1000.0,1000.0
25%,-11.0,0.249233,16.0,89.0,4.0,1011.0,1124.0
50%,5.0,0.501461,31.0,180.0,8.0,1023.0,1249.0
75%,22.0,0.751978,48.0,269.0,12.0,1035.0,1375.0
max,38.0,0.99999,63.0,359.0,16.0,1046.0,1500.0


1. Seems that there are some missing values in the data 
2. Temperature variable has some string values

In [224]:
raw_data["Temperature (C)"].unique()

array(['-13', '15', '33', '30', '27', '-17', '-5', '-14', '10', '7', '9',
       '20', '3', '29', '-8', '-15', '-20', '36', '32', '6', '17', '28',
       '-21', '23', '-4', '25', '-7', '16', '39', '-1', '13', '35', '22',
       '12', '14', '1', '8', '-3', '38', '5', '37', '-10', '19', '34',
       '26', '0', '24', '11', '21', '-9', '-2', '4', '-19', '-6', '2',
       '-16', '-11', '18', '31', '-18', '-12', nan, '-', 16, 30, 17, -13,
       36, 4, 9, 10, 13, 27, -8, -16, 34, 2, -11, -9, -10, -5, 39, 14, 7,
       20, -3, 15, 24, 35, 25, -12, 26, -14, 19, 5, 38, -4, -17, 0, 8, 21,
       -20, -21, 37, 22, 11, 28, 12, -18, 29, 1, -2, -7, -6, 33, 3, 32,
       -1, 6, -15, 18, 23, 31, -19], dtype=object)

In [225]:
raw_data["Temperature (C)"].replace('-', np.nan, inplace = True)
raw_data["Temperature (C)"] = raw_data["Temperature (C)"].astype(float)

In [226]:
raw_data["Temperature (C)"].unique()

array([-13.,  15.,  33.,  30.,  27., -17.,  -5., -14.,  10.,   7.,   9.,
        20.,   3.,  29.,  -8., -15., -20.,  36.,  32.,   6.,  17.,  28.,
       -21.,  23.,  -4.,  25.,  -7.,  16.,  39.,  -1.,  13.,  35.,  22.,
        12.,  14.,   1.,   8.,  -3.,  38.,   5.,  37., -10.,  19.,  34.,
        26.,   0.,  24.,  11.,  21.,  -9.,  -2.,   4., -19.,  -6.,   2.,
       -16., -11.,  18.,  31., -18., -12.,  nan])

In [227]:
raw_data.isnull().sum()

Cloud_Condition                 0
Rain_OR_SNOW                  115
Temperature (C)               253
Apparent Temperature (C)        3
Humidity                        1
Wind Speed (km/h)               2
Wind Bearing (degrees)         37
Visibility (km)                20
Pressure (millibars)           65
Condensation                    0
Solar irradiance intensity      0
dtype: int64

In [228]:
raw_data = raw_data.dropna()

In [229]:
raw_data.shape

(70938, 11)

In [230]:
#List Of categorical Columns and Numerical Columns(Except target variable)
cat=[]
num=[]
for x in raw_data.columns[1:]:
    if raw_data[x].dtype=="object":
        cat.append(x)
    else:
        num.append(x)

In [231]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()

for x in cat:
    raw_data[x]=le.fit_transform(raw_data[x])

In [232]:
raw_data.head()

Unnamed: 0,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
0,Partly Cloudy,0,-13.0,-19.0,0.134364,17.0,68.0,4.0,1008.0,2,1068
1,Partly Cloudy,0,15.0,5.0,0.847434,8.0,291.0,2.0,1036.0,2,1291
2,Partly Cloudy,0,33.0,-12.0,0.763775,32.0,32.0,8.0,1004.0,0,1433
3,Partly Cloudy,1,30.0,36.0,0.255069,15.0,130.0,3.0,1016.0,0,1410
4,Partly Cloudy,1,27.0,30.0,0.495435,63.0,60.0,15.0,1007.0,1,1391


In [233]:
raw_data.dtypes

Cloud_Condition                object
Rain_OR_SNOW                    int32
Temperature (C)               float64
Apparent Temperature (C)      float64
Humidity                      float64
Wind Speed (km/h)             float64
Wind Bearing (degrees)        float64
Visibility (km)               float64
Pressure (millibars)          float64
Condensation                    int32
Solar irradiance intensity      int64
dtype: object

In [234]:
raw_data["Cloud_Condition"].value_counts()

Mostly Cloudy                          21860
Partly Cloudy                          17526
Overcast                               13506
Clear                                   9648
Foggy                                   5871
Breezy and Dry                           630
Breezy and Mostly Cloudy                 467
Breezy and Overcast                      452
Breezy and Partly Cloudy                 350
Light Rain                               209
Dry and Partly Cloudy                     86
Windy and Partly Cloudy                   63
Breezy                                    45
Windy and Overcast                        42
Dry                                       34
Breezy and Foggy                          34
Humid and Mostly Cloudy                   32
Windy and Mostly Cloudy                   32
Humid and Partly Cloudy                   17
Dry and Mostly Cloudy                     14
Humid and Overcast                         5
Windy                                      5
Drizzle   

In [235]:
# raw_data['Cloud_Condition'].replace("Breezy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Breezy and Dry","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Breezy and Foggy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Breezy and Mostly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Breezy and Overcast","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Breezy and Partly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Clear","Clear",inplace = True)
# raw_data['Cloud_Condition'].replace("Dangerously Windy and Partly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Drizzle","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Dry","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Dry and Mostly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Dry and Partly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Foggy","Foggy",inplace = True)
# raw_data['Cloud_Condition'].replace("Humid and Mostly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Humid and Overcast","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Humid and Partly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Light Rain","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Mostly Cloudy","Mostly Cloudy",inplace = True)
# raw_data['Cloud_Condition'].replace("Overcast","Overcast",inplace = True)
# raw_data['Cloud_Condition'].replace("Partly Cloudy","Partly Cloudy",inplace = True)
# raw_data['Cloud_Condition'].replace("Windy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Windy and Dry","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Windy and Foggy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Windy and Mostly Cloudy","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Windy and Overcast","Unknown",inplace = True)
# raw_data['Cloud_Condition'].replace("Windy and Partly Cloudy","Unknown",inplace = True)

In [236]:
working_data = raw_data

In [237]:
working_data["Cloud_Condition"].value_counts()

Mostly Cloudy                          21860
Partly Cloudy                          17526
Overcast                               13506
Clear                                   9648
Foggy                                   5871
Breezy and Dry                           630
Breezy and Mostly Cloudy                 467
Breezy and Overcast                      452
Breezy and Partly Cloudy                 350
Light Rain                               209
Dry and Partly Cloudy                     86
Windy and Partly Cloudy                   63
Breezy                                    45
Windy and Overcast                        42
Dry                                       34
Breezy and Foggy                          34
Humid and Mostly Cloudy                   32
Windy and Mostly Cloudy                   32
Humid and Partly Cloudy                   17
Dry and Mostly Cloudy                     14
Humid and Overcast                         5
Windy                                      5
Drizzle   

In [238]:
# from sklearn import preprocessing
# Xg = ["Cloud_Condition"]
# le = preprocessing.LabelEncoder()

# for x in Xg:
#     raw_data[x]=le.fit_transform(raw_data[x])

In [239]:
working_data

Unnamed: 0,Cloud_Condition,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
0,Partly Cloudy,0,-13.0,-19.0,0.134364,17.0,68.0,4.0,1008.0,2,1068
1,Partly Cloudy,0,15.0,5.0,0.847434,8.0,291.0,2.0,1036.0,2,1291
2,Partly Cloudy,0,33.0,-12.0,0.763775,32.0,32.0,8.0,1004.0,0,1433
3,Partly Cloudy,1,30.0,36.0,0.255069,15.0,130.0,3.0,1016.0,0,1410
4,Partly Cloudy,1,27.0,30.0,0.495435,63.0,60.0,15.0,1007.0,1,1391
...,...,...,...,...,...,...,...,...,...,...,...
71423,Foggy,0,39.0,31.0,0.243553,19.0,347.0,14.0,1013.0,2,1269
71424,Foggy,0,8.0,4.0,0.913108,1.0,101.0,8.0,1031.0,0,1224
71425,Mostly Cloudy,0,28.0,-22.0,0.496076,2.0,149.0,7.0,1032.0,2,1463
71426,Mostly Cloudy,0,-16.0,-3.0,0.783161,44.0,266.0,11.0,1019.0,1,1251


In [240]:
X = working_data.values[:, 1:]
Y = working_data.values[:, 0]

In [249]:
from sklearn.model_selection import train_test_split

#Split the data into test and train
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=10)

In [250]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)
#scaler.fit_transform(X_train)
X_train = scaler.transform(X_train)
X_test=scaler.transform(X_test)

In [251]:
X.shape

(70938, 10)

In [261]:
Y.shape

(70938,)

In [263]:
# # import SMOTE from imblearn library
# from imblearn.over_sampling import SMOTE
# sm = SMOTE()
# X_train_res, Y_train_res = sm.fit_resample(X, Y)

### Random Forest

In [266]:
#predicting using the Decision_Tree_Classifier
from sklearn.ensemble import RandomForestClassifier

model= RandomForestClassifier(random_state=10,n_estimators=86,criterion="entropy",min_samples_leaf=100,
                                    min_samples_split=80,bootstrap=False,max_depth=10)

#fit the model on the data and predict the values
model.fit(X_train,Y_train)

Y_pred = model.predict(X_test)

In [268]:
# from sklearn.metrics import confusion_matrix, accuracy_score, \
classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)

[[   0    0    0    0    0    0    0    0    0    0    2    0    0    0
     0   11    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    8    0    0    0
     0  124    2    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    3    0    0    0
     0    3    1    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    6    0    0    0
     0   96    2    1    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    6    0    0    0
     0   78    2    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    1    0    0    0
     0   56    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0  175    0    0    0
     0 1708   47    2    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    2    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    

In [299]:
#predicting using the KNeighbors_classifier
from sklearn.neighbors import KNeighborsClassifier
model_KNN = KNeighborsClassifier(n_neighbors=50, metric= "minkowski")
#euclidean,manhattan,minkowski
#fit the model on the data and predict the values

model_KNN.fit(X_train,Y_train)
Y_pred=model_KNN.predict(X_test)
# print(list(zip(Y_test,Y_pred)))


In [300]:
from sklearn.metrics import confusion_matrix, accuracy_score, \
classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)

[[   0    0    0    0    0    0    0    0    0    0    2    0    0    0
     0    9    0    2    0    0    0    0    0]
 [   0    0    0    0    0    0    2    0    0    0    8    0    0    0
     0   92    3   29    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    3    0    0    0
     0    2    1    1    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    6    0    0    0
     0   74    3   22    0    0    0    0    0]
 [   0    0    0    0    0    0    1    0    0    0    6    0    0    0
     0   52    5   22    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    1    0    0    0
     0   44    0   12    0    0    0    0    0]
 [   0    0    0    0    0    0   11    0    0    0  146    0    0    0
     0 1262  105  408    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    2    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    

In [306]:
#predicting using the Extra_Tree_Classifier
from sklearn.ensemble import ExtraTreesClassifier

model= ExtraTreesClassifier(n_estimators=100,
                                          random_state=10)

#fit the model on the data and predict the values
model.fit(X_train,Y_train)

Y_pred = model.predict(X_test)

In [307]:
from sklearn.metrics import confusion_matrix, accuracy_score, \
classification_report

cfm=confusion_matrix(Y_test,Y_pred)
print(cfm)

print("Classification report: ")

print(classification_report(Y_test,Y_pred))

acc=accuracy_score(Y_test, Y_pred)
print("Accuracy of the model: ",acc)

[[   0    0    0    0    0    0    1    0    0    0    2    0    0    0
     0    5    0    5    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    8    0    0    0
     0   80    5   41    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    1    0    0    0
     0    2    2    2    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    5    0    0    0
     0   63   10   27    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    6    0    0    0
     0   57    5   18    0    0    0    0    0]
 [   0    0    0    0    0    0    1    0    0    0    1    0    0    0
     0   43    3    9    0    0    0    0    0]
 [   0    0    0    0    0    0   25    0    0    0  113    0    0    0
     0 1171  161  462    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    1    0    1    0    0    0    0    0]
 [   0    0    0    0    0    0    0    

In [269]:
Test_data = pd.read_csv("test_CloudCondition.csv")
Test_data.shape

(16452, 11)

In [270]:
Test_data.head()

Unnamed: 0,Day,Rain_OR_SNOW,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),Condensation,Solar irradiance intensity
0,80002,rain,1.0,0.0,0.07815,17.0,300,10.0,1029.0,Fog,1399
1,80003,rain,-3.0,25.0,0.653251,25.0,322,0.0,1009.0,Dry,1468
2,80004,rain,18.0,17.0,0.93097,37.0,330,12.0,1045.0,Dry,1152
3,80005,rain,33.0,-12.0,0.778844,50.0,350,3.0,1022.0,Fog,1003
4,80006,rain,25.0,-19.0,0.491523,62.0,300,12.0,1042.0,Fog,1228


In [271]:
Test_data.isnull().sum()

Day                           0
Rain_OR_SNOW                  0
Temperature (C)               0
Apparent Temperature (C)      0
Humidity                      0
Wind Speed (km/h)             0
Wind Bearing (degrees)        0
Visibility (km)               0
Pressure (millibars)          0
Condensation                  0
Solar irradiance intensity    0
dtype: int64

In [272]:
Test_data.dtypes

Day                             int64
Rain_OR_SNOW                   object
Temperature (C)               float64
Apparent Temperature (C)      float64
Humidity                      float64
Wind Speed (km/h)             float64
Wind Bearing (degrees)          int64
Visibility (km)               float64
Pressure (millibars)          float64
Condensation                   object
Solar irradiance intensity      int64
dtype: object

In [273]:
#List Of categorical Columns and Numerical Columns(Except target variable)
cat=[]
num=[]
for x in Test_data.columns[:]:
    if Test_data[x].dtype=="object":
        cat.append(x)
    else:
        num.append(x)

In [274]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()

for x in cat:
    Test_data[x]=le.fit_transform(Test_data[x])

In [275]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)
#scaler.fit_transform(X_train)
# X_train = scaler.transform(X_train)
Test=scaler.transform(Test_data.iloc[:,1:])

In [276]:
Test

array([[-2.85007456e-16,  1.00000000e+00, -1.11775298e-15, ...,
         1.02900000e+03,  1.00000000e+00,  1.39900000e+03],
       [-2.85007456e-16, -3.00000000e+00,  2.50000000e+01, ...,
         1.00900000e+03, -2.88521043e-16,  1.46800000e+03],
       [-2.85007456e-16,  1.80000000e+01,  1.70000000e+01, ...,
         1.04500000e+03, -2.88521043e-16,  1.15200000e+03],
       ...,
       [-2.85007456e-16,  2.20388889e+01,  4.40166667e+01, ...,
         1.01566000e+03,  3.00000000e+00,  1.00800000e+03],
       [-2.85007456e-16,  2.15222222e+01,  4.57722222e+01, ...,
         1.01595000e+03,  1.00000000e+00,  1.36900000e+03],
       [-2.85007456e-16,  2.04388889e+01,  4.75277778e+01, ...,
         1.01616000e+03, -2.88521043e-16,  1.24400000e+03]])

In [277]:
# Test = Test_data.values[:,1:]

In [312]:
# Y_pred=model_GradientBoost.predict(Test)
# Y_pred = model_DecisionTree.predict(Test)
# Y_pred_new=svc_model.predict(Test)
# Y_pred=model_KNN.predict(Test)
Y_pred = model.predict(Test)
# Y_pred = model.predict(Test)




In [313]:
# Y_pred= pd.DataFrame(Y_pred)

In [314]:
# Y_pred = le.inverse_transform(Y_pred)

In [315]:
Sol = pd.DataFrame({ "Day":Test_data.Day, 'Cloud_Condition': Y_pred })

In [316]:
Sol.to_csv("KNNSol12.csv", index= False)