In [0]:
!pip install --upgrade --quiet gspread

In [0]:
from google.colab import auth
auth.authenticate_user()
import gspread

from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())

import pandas as pd
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import log_loss, confusion_matrix, classification_report
import numpy as np


In [0]:
worksheet = gc.open('Water by ETo').sheet1

# get_all_values gives a list of rows.
rows = worksheet.get_all_values()

In [4]:
df = pd.DataFrame.from_records(rows)
new_header = df.iloc[0]
df = df[1:]
df.columns = new_header
df.head()

Unnamed: 0,Type,Botanical Name,Common Name,"1, 2, 3, 4, 6, 8","12, 14, 15, 16","1, 2, 4, 6",9,"14, 17",18
1,S,Abelia chinensis,Chinese abelia,Moderate/Medium,Unknown,Unknown,Moderate/Medium,Inappropriate,Inappropriate
2,S,Abelia floribunda,Mexican abelia,Moderate/Medium,Unknown,Moderate/Medium,Moderate/Medium,Inappropriate,Inappropriate
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,Unknown,Unknown,Moderate/Medium,Unknown,Unknown
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,Moderate/Medium,Moderate/Medium,Moderate/Medium,Moderate/Medium,Inappropriate


In [0]:
plant_df = df[["Type", "Botanical Name", "Common Name", "1, 2, 3, 4, 6, 8"]]

In [6]:
plant_df.head()

Unnamed: 0,Type,Botanical Name,Common Name,"1, 2, 3, 4, 6, 8"
1,S,Abelia chinensis,Chinese abelia,Moderate/Medium
2,S,Abelia floribunda,Mexican abelia,Moderate/Medium
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium


In [7]:
df_list = []
for eto in [1, 2, 3, 4, 6, 8]:
    tmp_df = plant_df.copy()
    tmp_df["Eto"] = eto
    df_list.append(tmp_df)
final_plant_df1 = pd.concat(df_list)
final_plant_df1.columns = ["Type", "Botanical Name", "Common Name", "Water", "Eto"]
final_plant_df1.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Moderate/Medium,1
2,S,Abelia floribunda,Mexican abelia,Moderate/Medium,1
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,1
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,1
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,1


In [8]:
final_plant_df1.groupby("Eto").count()

Unnamed: 0_level_0,Type,Botanical Name,Common Name,Water
Eto,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,3543,3543,3543,3543
2,3543,3543,3543,3543
3,3543,3543,3543,3543
4,3543,3543,3543,3543
6,3543,3543,3543,3543
8,3543,3543,3543,3543


In [9]:
plant_df = df[["Type", "Botanical Name", "Common Name", "12, 14, 15, 16"]]
df_list = []
for eto in [12, 14, 15, 16]:
    tmp_df = plant_df.copy()
    tmp_df["Eto"] = eto
    df_list.append(tmp_df)
final_plant_df2 = pd.concat(df_list)
final_plant_df2.columns = ["Type", "Botanical Name", "Common Name", "Water", "Eto"]
final_plant_df2.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Unknown,12
2,S,Abelia floribunda,Mexican abelia,Unknown,12
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,12
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,12
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,12


In [10]:
plant_df = df[["Type", "Botanical Name", "Common Name", "1, 2, 4, 6"]]
df_list = []
for eto in [1, 2, 4, 6]:
    tmp_df = plant_df.copy()
    tmp_df["Eto"] = eto
    df_list.append(tmp_df)
final_plant_df3 = pd.concat(df_list)
final_plant_df3.columns = ["Type", "Botanical Name", "Common Name", "Water", "Eto"]
final_plant_df3.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Unknown,1
2,S,Abelia floribunda,Mexican abelia,Moderate/Medium,1
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,1
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,1
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,1


In [11]:
plant_df = df[["Type", "Botanical Name", "Common Name", "9"]]
df_list = []
for eto in [9]:
    tmp_df = plant_df.copy()
    tmp_df["Eto"] = eto
    df_list.append(tmp_df)
final_plant_df4 = pd.concat(df_list)
final_plant_df4.columns = ["Type", "Botanical Name", "Common Name", "Water", "Eto"]
final_plant_df4.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Moderate/Medium,9
2,S,Abelia floribunda,Mexican abelia,Moderate/Medium,9
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,9
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Moderate/Medium,9
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,9


In [12]:
plant_df = df[["Type", "Botanical Name", "Common Name", "14, 17"]]
df_list = []
for eto in [14, 17]:
    tmp_df = plant_df.copy()
    tmp_df["Eto"] = eto
    df_list.append(tmp_df)
final_plant_df5 = pd.concat(df_list)
final_plant_df5.columns = ["Type", "Botanical Name", "Common Name", "Water", "Eto"]
final_plant_df5.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Inappropriate,14
2,S,Abelia floribunda,Mexican abelia,Inappropriate,14
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,14
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,14
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,14


In [13]:
plant_df = df[["Type", "Botanical Name", "Common Name", "18"]]
df_list = []
for eto in [18]:
    tmp_df = plant_df.copy()
    tmp_df["Eto"] = eto
    df_list.append(tmp_df)
final_plant_df6 = pd.concat(df_list)
final_plant_df6.columns = ["Type", "Botanical Name", "Common Name", "Water", "Eto"]
final_plant_df6.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Inappropriate,18
2,S,Abelia floribunda,Mexican abelia,Inappropriate,18
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,18
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,18
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Inappropriate,18


In [14]:
final_plant_df = pd.concat([final_plant_df1, final_plant_df2, final_plant_df3, final_plant_df4, final_plant_df5, final_plant_df6])
final_plant_df.head(15)

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto
1,S,Abelia chinensis,Chinese abelia,Moderate/Medium,1
2,S,Abelia floribunda,Mexican abelia,Moderate/Medium,1
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,1
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,1
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,Moderate/Medium,1
6,S,Abeliophyllum distichum,forsythia,Moderate/Medium,1
7,S,Abelmoschus manihot (Hibiscus manihot),sunset muskmallow,Unknown,1
8,T,Abies pinsapo,Spanish fir,Low,1
9,T N,Abies spp. (CA native and non-native),fir,Moderate/Medium,1
10,P N,Abronia latifolia,yellow sand verbena,Very Low,1


In [15]:
final_plant_df.count()

Type              63774
Botanical Name    63774
Common Name       63774
Water             63774
Eto               63774
dtype: int64

In [0]:
p_df = pd.concat([final_plant_df, final_plant_df['Type'].str.get_dummies(sep=' ')], 1)

In [17]:
p_df.groupby("Water").count()

Unnamed: 0_level_0,Type,Botanical Name,Common Name,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V
Water,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93
High,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853
Inappropriate,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110,6110
Low,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971
Moderate/Medium,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614
Unknown,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940,18940
Very Low,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193


In [0]:
to_replace = {
    "Very Low": 1,
    "Low": 2,
    "Moderate/Medium": 3,
    "High": 4 
}
pw_df = p_df.replace(to_replace)

In [0]:
pw_df['validation'] = pw_df.apply(lambda row: 1 if (row["Water"] in ["Unknown", "", None, "Inappropriate"] ) else 0, axis=1)

In [20]:
pw_df.head()

Unnamed: 0,Type,Botanical Name,Common Name,Water,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,validation
1,S,Abelia chinensis,Chinese abelia,3,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,S,Abelia floribunda,Mexican abelia,3,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,Unknown,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1
4,S,Abelia parvifolia (A. longituba),Schuman abelia,Unknown,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,3,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0


In [0]:
validation = pw_df[pw_df.validation == 1][['Eto', 'A', 'Ba', 'Bu', 'G', 'GC', 'Gc', 'N', 'P', 'Pm', 'S', 'Su', 'T', 'V']]

In [22]:
validation = validation.astype('int32')
validation.head()

Unnamed: 0,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V
3,1,0,0,0,0,0,0,0,0,0,1,0,0,0
4,1,0,0,0,0,0,0,0,0,0,1,0,0,0
7,1,0,0,0,0,0,0,0,0,0,1,0,0,0
16,1,0,0,0,0,0,0,0,0,0,1,0,1,0
17,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [23]:
validation.groupby("Eto").count()

Unnamed: 0_level_0,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V
Eto,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667
2,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667
3,818,818,818,818,818,818,818,818,818,818,818,818,818
4,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667
6,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667,1667
8,818,818,818,818,818,818,818,818,818,818,818,818,818
9,1424,1424,1424,1424,1424,1424,1424,1424,1424,1424,1424,1424,1424
12,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691
14,4620,4620,4620,4620,4620,4620,4620,4620,4620,4620,4620,4620,4620
15,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691,1691


In [0]:
train_test = pw_df[pw_df.validation == 0][['Eto', 'A', 'Ba', 'Bu', 'G', 'GC', 'Gc', 'N', 'P', 'Pm', 'S', 'Su', 'T', 'V', "Water"]]

In [25]:
train_test = train_test.astype('int32')
train_test.head()

Unnamed: 0,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,Water
1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,3
2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,3
5,1,0,0,0,0,0,1,0,0,0,1,0,0,0,3
6,1,0,0,0,0,0,0,0,0,0,1,0,0,0,3
8,1,0,0,0,0,0,0,0,0,0,0,0,1,0,2


In [26]:
train_test.groupby("Eto").count()

Unnamed: 0_level_0,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,Water
Eto,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419
2,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419
3,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725
4,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419
6,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419,5419
8,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725,2725
9,2119,2119,2119,2119,2119,2119,2119,2119,2119,2119,2119,2119,2119,2119
12,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852
14,2466,2466,2466,2466,2466,2466,2466,2466,2466,2466,2466,2466,2466,2466
15,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852,1852


In [27]:
train_test.groupby("Water").count()

Unnamed: 0_level_0,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V
Water,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193,3193
2,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971,13971
3,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614,19614
4,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853,1853


In [0]:
x = train_test[['Eto', 'A', 'Ba', 'Bu', 'G', 'GC', 'Gc', 'N', 'P', 'Pm', 'S', 'Su', 'T', 'V']]
y = train_test[["Water"]]

In [0]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.33, random_state=42
)

In [30]:
estimator = RandomForestClassifier(n_estimators=100, random_state=42)
model = RFECV(estimator, step=1, cv=5)
model.fit(x, y)
model.get_support()
# estimator = KNeighborsClassifier()

  y = column_or_1d(y, warn=True)


array([False, False, False, False, False, False, False,  True, False,
       False, False, False, False, False])

In [31]:
cv_results = cross_validate(estimator, x, y, cv=5, return_estimator=True)

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)


In [32]:
cv_results

{'estimator': (RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                         max_depth=None, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=1, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=100,
                         n_jobs=None, oob_score=False, random_state=42, verbose=0,
                         warm_start=False),
  RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                         max_depth=None, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=1, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=100,
                         n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       

In [0]:
model = cv_results["estimator"][4]
#model = model.fit(x_train, y_train)

In [34]:
model.score(x_test, y_test)

0.5907914346223233

In [0]:
predicted = model.predict(x_test)

In [36]:
print(classification_report(y_test, predicted, labels=[1, 2, 3, 4]))

              precision    recall  f1-score   support

           1       0.58      0.20      0.29      1029
           2       0.53      0.35      0.42      4605
           3       0.61      0.88      0.72      6498
           4       1.00      0.00      0.01       617

    accuracy                           0.59     12749
   macro avg       0.68      0.36      0.36     12749
weighted avg       0.60      0.59      0.54     12749



In [37]:
confusion_matrix(y_test, predicted)

array([[ 203,  549,  277,    0],
       [ 114, 1601, 2890,    0],
       [  30,  743, 5725,    0],
       [   3,  143,  468,    3]])

In [38]:
predicted = model.predict_proba(x_test)
log_loss(y_test, predicted, labels=[1, 2, 3, 4])

0.9745320905363936

In [39]:
y_test.groupby("Water").count()

1
2
3
4


In [40]:
y_train.groupby("Water").count()

1
2
3
4


In [0]:
validation["Water"] = model.predict(validation[x.columns])

In [42]:
validation.groupby("Water").count()

Unnamed: 0_level_0,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V
Water,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,553,553,553,553,553,553,553,553,553,553,553,553,553,553
2,4954,4954,4954,4954,4954,4954,4954,4954,4954,4954,4954,4954,4954,4954
3,19604,19604,19604,19604,19604,19604,19604,19604,19604,19604,19604,19604,19604,19604
4,32,32,32,32,32,32,32,32,32,32,32,32,32,32


In [48]:
validation_complete = pd.concat([pw_df[pw_df.validation == 1][["Type", "Botanical Name", "Common Name", 'Eto',
 'A',
 'Ba',
 'Bu',
 'G',
 'GC',
 'Gc',
 'N',
 'P',
 'Pm',
 'S',
 'Su',
 'T',
 'V', "validation"]], validation["Water"]], axis=1)
validation_complete.head()

Unnamed: 0,Type,Botanical Name,Common Name,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,validation,Water
3,S,Abelia mosanensis 'Fragrant Abelia',fragrant abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,3
4,S,Abelia parvifolia (A. longituba),Schuman abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,3
7,S,Abelmoschus manihot (Hibiscus manihot),sunset muskmallow,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,3
16,S T,Acacia abyssinica,Abyssinian acacia,1,0,0,0,0,0,0,0,0,0,1,0,1,0,1,2
17,S,Acacia aneura,mulga,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,3


In [49]:
train_test_complete = pd.concat([pw_df[pw_df.validation == 0][["Type", "Botanical Name", "Common Name", 'Eto',
 'A',
 'Ba',
 'Bu',
 'G',
 'GC',
 'Gc',
 'N',
 'P',
 'Pm',
 'S',
 'Su',
 'T',
 'V', "validation"]], train_test["Water"]], axis=1)
train_test_complete.head()

Unnamed: 0,Type,Botanical Name,Common Name,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,validation,Water
1,S,Abelia chinensis,Chinese abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
2,S,Abelia floribunda,Mexican abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,3
6,S,Abeliophyllum distichum,forsythia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
8,T,Abies pinsapo,Spanish fir,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2


In [50]:
df_complete = pd.concat([train_test_complete, validation_complete], axis=0)
df_complete.head()

Unnamed: 0,Type,Botanical Name,Common Name,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,validation,Water
1,S,Abelia chinensis,Chinese abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
2,S,Abelia floribunda,Mexican abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,3
6,S,Abeliophyllum distichum,forsythia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
8,T,Abies pinsapo,Spanish fir,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2


In [0]:
df_complete.columns = ["Type", "Botanical Name", "Common Name", 'Eto',
 'A',
 'Ba',
 'Bu',
 'G',
 'GC',
 'Gc',
 'N',
 'P',
 'Pm',
 'S',
 'Su',
 'T',
 'V', "Predicted", "Water"]

In [53]:
df_complete.head()

Unnamed: 0,Type,Botanical Name,Common Name,Eto,A,Ba,Bu,G,GC,Gc,N,P,Pm,S,Su,T,V,Predicted,Water
1,S,Abelia chinensis,Chinese abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
2,S,Abelia floribunda,Mexican abelia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
5,Gc S,Abelia x grandiflora and cvs.,glossy abelia,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,3
6,S,Abeliophyllum distichum,forsythia,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3
8,T,Abies pinsapo,Spanish fir,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2


In [0]:
df_complete.to_csv("plant_dataset_predicted_without_inappropriate.csv", index=False)