In [147]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error,r2_score

In [148]:
data = pd.read_csv('/kaggle/input/arranged-soil-perameters/sensor_1.csv')

In [156]:
data.head()


Unnamed: 0,water_soil,conduct_soil,temp_soil,ill_lx
0,0.0,0,16.22,0.0
1,0.0,0,16.21,20.0
2,29.01,117,14.19,175.0
3,29.47,120,13.97,77.0
4,29.51,120,13.9,21.0


In [157]:
data.shape

(3898, 4)

In [158]:
unique_rows = data[~data.duplicated(keep=False)]
data=pd.concat([data,unique_rows],ignore_index=True)

In [159]:
data.describe()

Unnamed: 0,water_soil,conduct_soil,temp_soil,ill_lx
count,3898.0,3898.0,3898.0,3612.0
mean,6.109284,0.533607,12.380203,3455.223699
std,1.695488,7.841025,1.127553,5560.241264
min,0.0,0.0,10.11,0.0
25%,5.8925,0.0,11.55,0.0
50%,6.13,0.0,12.34,14.5
75%,6.26,0.0,13.03,4915.0
max,31.58,120.0,16.22,26630.0


In [160]:
data['water_soil'].value_counts()

water_soil
6.27    216
6.26    198
6.28    148
6.24    147
6.25    145
       ... 
5.69      2
5.61      2
5.45      2
5.47      2
5.14      2
Name: count, Length: 151, dtype: int64

In [161]:
data.groupby('water_soil').transform('mean')

Unnamed: 0,conduct_soil,temp_soil,ill_lx
0,0.0,16.215000,10.000000
1,0.0,16.215000,10.000000
2,117.0,14.190000,175.000000
3,120.0,13.970000,77.000000
4,120.0,13.900000,21.000000
...,...,...,...
3893,0.0,12.876667,8597.600000
3894,0.0,12.450000,5278.000000
3895,0.0,12.986667,4074.333333
3896,0.0,13.185000,3522.166667


In [162]:
data.isnull().sum()

water_soil        0
conduct_soil      0
temp_soil         0
ill_lx          286
dtype: int64

In [163]:
data['water_soil'].fillna(data['water_soil'].mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['water_soil'].fillna(data['water_soil'].mean(),inplace=True)


In [164]:
data.isnull().sum()

water_soil        0
conduct_soil      0
temp_soil         0
ill_lx          286
dtype: int64

In [165]:
data.groupby('water_soil').transform('mean')

Unnamed: 0,conduct_soil,temp_soil,ill_lx
0,0.0,16.215000,10.000000
1,0.0,16.215000,10.000000
2,117.0,14.190000,175.000000
3,120.0,13.970000,77.000000
4,120.0,13.900000,21.000000
...,...,...,...
3893,0.0,12.876667,8597.600000
3894,0.0,12.450000,5278.000000
3895,0.0,12.986667,4074.333333
3896,0.0,13.185000,3522.166667


In [166]:
data.fillna(method='ffill',inplace=True)

  data.fillna(method='ffill',inplace=True)


In [167]:
data.isnull().sum()

water_soil      0
conduct_soil    0
temp_soil       0
ill_lx          0
dtype: int64

In [168]:
data.tail()

Unnamed: 0,water_soil,conduct_soil,temp_soil,ill_lx
3893,5.13,0,12.44,9556.0
3894,5.14,0,12.45,5278.0
3895,5.16,0,12.49,6523.0
3896,5.17,0,12.52,3876.0
3897,5.19,0,12.55,3699.0


**PREDICTION 1**

In [169]:
y1 = data['conduct_soil']

In [170]:
train_size = int(len(y1)*0.8)
train,test=y1[:train_size],y1[train_size:]

In [171]:
model_1= ARIMA(train,order=(5,1,0))
model_1=model_1.fit()

In [172]:
train_pred = model_1.predict(steps=0,end=len(train)-1,typ='levels')
train_rmse= np.sqrt(mean_squared_error(train,train_pred))

In [173]:
print("training root mean square error is: ",train_rmse)

training root mean square error is:  4.068109618736801


In [174]:
test_pred=model_1.forecast(steps=len(test))
test_rmse=np.sqrt(mean_squared_error(test,test_pred))

In [175]:
print("testing root mean square error is: ",test_rmse)

testing root mean square error is:  0.0


In [176]:
input_data=[y1[len(y1)-1]]
input_data_series = pd.Series(input_data, index=[len(y1)])

In [177]:
future_data = pd.concat([y1,input_data_series])
future_model_1 = ARIMA(future_data,order=(5,1,0))
future_model_1=future_model_1.fit()

In [178]:
future_pred_1 = future_model_1.forecast(steps=1)

In [179]:
print("prediction is: ",future_pred_1)

prediction is:  3899   -9.881313e-324
dtype: float64


**PREDICTION 2**

In [180]:
y2 = data['temp_soil']

In [181]:
train_size = int(len(y2)*0.8)
train,test=y2[:train_size],y2[train_size:]

In [182]:
model_2= ARIMA(train,order=(5,1,0))
model_2=model_2.fit()

In [183]:
train_pred = model_2.predict(steps=0,end=len(train)-1,typ='levels')
train_rmse= np.sqrt(mean_squared_error(train,train_pred))

In [184]:
print("training root mean square error is: ",train_rmse)

training root mean square error is:  0.31909007691878466


In [185]:
test_pred=model_2.forecast(steps=len(test))
test_rmse=np.sqrt(mean_squared_error(test,test_pred))

In [186]:
print("testing root mean square error is: ",test_rmse)

testing root mean square error is:  1.9701608807782953


In [187]:
input_data=[y2[len(y2)-1]]
input_data_series = pd.Series(input_data, index=[len(y2)])

In [188]:
future_data = pd.concat([y2,input_data_series])
future_model_2 = ARIMA(future_data,order=(5,1,0))
future_model_2=future_model_2.fit()

In [189]:
future_pred_2 = future_model_2.forecast(steps=1)

In [190]:
print("prediction is: ",future_pred_2)

prediction is:  3899    12.561542
dtype: float64


**PREDICTION 3**

In [191]:
y3 = data['ill_lx']

In [192]:
train_size = int(len(y3)*0.8)
train,test=y3[:train_size],y3[train_size:]

In [193]:
model_3= ARIMA(train,order=(5,1,0))
model_3=model_3.fit()

In [194]:
train_pred = model_3.predict(steps=0,end=len(train)-1,typ='levels')
train_rmse= np.sqrt(mean_squared_error(train,train_pred))

In [195]:
print("training root mean square error is: ",train_rmse)

training root mean square error is:  3088.6942975749316


In [196]:
test_pred=model_3.forecast(steps=len(test))
test_rmse=np.sqrt(mean_squared_error(test,test_pred))

In [197]:
print("testing root mean square error is: ",test_rmse)

testing root mean square error is:  5798.580312940036


In [198]:
input_data=[y3[len(y3)-1]]
input_data_series = pd.Series(input_data, index=[len(y3)])

In [199]:
future_data = pd.concat([y3,input_data_series])
future_model_3 = ARIMA(future_data,order=(5,1,0))
future_model_3=future_model_3.fit()

In [200]:
future_pred_3 = future_model_3.forecast(steps=1)

In [201]:
print("prediction is: ",future_pred_3)

prediction is:  3899    4002.67239
dtype: float64


**PREDICTION 4**

In [202]:
y4 = data['water_soil']

In [203]:
train_size = int(len(y4)*0.8)
train,test=y4[:train_size],y4[train_size:]

In [204]:
model_4= ARIMA(train,order=(5,1,0))
model_4=model_4.fit()

In [205]:
train_pred = model_4.predict(steps=0,end=len(train)-1,typ='levels')
train_rmse= np.sqrt(mean_squared_error(train,train_pred))

In [206]:
print("training root mean square error is: ",train_rmse)

training root mean square error is:  0.9758671906969584


In [207]:
test_pred=model_4.forecast(steps=len(test))
test_rmse=np.sqrt(mean_squared_error(test,test_pred))

In [208]:
print("testing root mean square error is: ",test_rmse)

testing root mean square error is:  0.47220868871368016


In [209]:
input_data=[y4[len(y4)-1]]
input_data_series = pd.Series(input_data, index=[len(y4)])

In [210]:
future_data = pd.concat([y4,input_data_series])
future_model_4 = ARIMA(future_data,order=(5,1,0))
future_model_4=future_model_4.fit()

In [211]:
future_pred_4 = future_model_4.forecast(steps=1)

In [212]:
print("prediction is: ",future_pred_4)

prediction is:  3899    5.188143
dtype: float64


**FINAL PREDICTION**

In [213]:
x= data.drop(columns='water_soil', axis='1')
y= data['water_soil']

In [214]:
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,stratify=y)

In [215]:
model=LinearRegression()

In [216]:
model.fit(x_train,y_train)

In [217]:
train_pred=model.predict(x_train)
train_mse=mean_squared_error(y_train,train_pred)
train_r2=r2_score(y_train,train_pred)

In [218]:
print("training mse is: ",train_mse)
print("training r2 is: ",train_r2)

training mse is:  0.11206746533136248
training r2 is:  0.966783928002807


In [219]:
test_pred = model.predict(x_test)
test_mse = mean_squared_error(test_pred,y_test)
test_r2 = r2_score(y_test,test_pred)

In [220]:
print("testing mse is: ", test_mse)
print("testing r2 is: ", test_r2)

testing mse is:  0.10585283566504101
testing r2 is:  0.8778993283897837


In [223]:
input_data=(future_pred_1.iloc[0],future_pred_2.iloc[0],future_pred_3.iloc[0])
input_data_array=np.asarray(input_data)
input_data_reshaped= input_data_array.reshape(1,-1)
prediction_final = model.predict(input_data_reshaped)



In [224]:
print("Prediction is: ",prediction_final)

Prediction is:  [5.94252297]


In [225]:
ligression_mse = mean_squared_error(prediction_final,future_pred_4)
ligression_rmse= np.sqrt(ligression_mse)
ligression_r2 = r2_score(future_pred_4,prediction_final)



In [226]:
print("mse of ligression is: ",ligression_mse)
print("rmse of ligression is: ",ligression_rmse)
print("r2 of ligression is: ",ligression_r2)

mse of ligression is:  0.5690892079868045
rmse of ligression is:  0.7543800156332381
r2 of ligression is:  nan


In [227]:
print("The Next value of Moisture is:",prediction_final)

The Next value of Moisture is: [5.94252297]
