# Librerias

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn import preprocessing

## Cargado de Data

In [None]:
data = pd.read_csv("soil_Rendimiento_PCA3.csv")

In [None]:
data.head(5)

Unnamed: 0,Area,N,P205,K2SO4,CaO,MgO,Zn,Qfe,Rendimiento
0,0.87,171,47,157,20,31.0,8.0,2.0,12067.0
1,0.87,175,47,177,20,40.0,8.0,2.0,18219.0
2,0.87,197,54,181,99,35.5,1.2,2.0,14090.0
3,0.87,197,54,171,99,38.0,1.2,2.0,10320.0
4,0.87,188,54,171,99,38.0,1.2,0.2,9340.0


## Especificacion de Atributos y Objetivo

In [None]:
Xs = data.drop(['Rendimiento'], axis=1) # Atributos
y = data['Rendimiento'].values.reshape(-1,1) # Objetivo
reg = LinearRegression() # Regresion Lineal
reg.fit(Xs, y) # Entrenamiento de Y respecto a X's

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Interceptos y Coeficientes

In [None]:
print("Rendimiento")
print("Interceptos: \n",reg.intercept_,"\n")
print("Coeficiente: \n",reg.coef_)
print("\n")

Rendimiento
Interceptos: 
 [171395.84181367] 

Coeficiente: 
 [[ 15772.98944354   -496.046875      -70.18791534    897.91651786
   -2033.05074799    -78.73392857 -25762.34624975   4647.92215278]]




## Modelo de Regrecion Lineal

In [None]:
print("El Modelo Resultante:\nRendimiento = {:.5} + {:.5}*Area + {:.5}*N + {:.5}*P205 + {:.5}*K2SO4 + {:.5}*CaO + {:.5}*MgO + {:.5}*Zn +{:.5}*Qfe"
      .format(
          reg.intercept_[0],
          reg.coef_[0][0],reg.coef_[0][1],
          reg.coef_[0][2],reg.coef_[0][3],
          reg.coef_[0][4],reg.coef_[0][5],
          reg.coef_[0][6],reg.coef_[0][7]))

El Modelo Resultante:
Rendimiento = 1.714e+05 + 1.5773e+04*Area + -496.05*N + -70.188*P205 + 897.92*K2SO4 + -2033.1*CaO + -78.734*MgO + -2.5762e+04*Zn +4647.9*Qfe


## Precisión del Modelo

In [None]:
# Es la proporción de la varianza en la variable dependiente que es predecible a partir de la variable independiente
# "Z" percent of the variation in the "y" data is due to variation in the "x" data

percent = reg.score(Xs, y)
print("Precicion Modelo: ",percent,"\n")
print("Del total de Kg de paltas",(round(percent,5))*100,"% aproximadamente son influidas por las variables en la data")

Precicion Modelo:  0.8070858662144422 

Del total de Kg de paltas 80.709 % aproximadamente son influidas por las variables en la data


## Testeo

In [None]:
def Rendimiento(Area,N,P205,K2SO4,CaO,MgO,Zn,Qfe):
    Ambos = reg.intercept_[0] + reg.coef_[0][0]*Area + reg.coef_[0][1]*N + reg.coef_[0][2]*P205 + reg.coef_[0][3]*K2SO4 + reg.coef_[0][4]*CaO + reg.coef_[0][5]*MgO + reg.coef_[0][6]*Zn + reg.coef_[0][7]*Qfe
    return Ambos
    #print(round(Ambos,2),"t/ha")

In [None]:
round(Rendimiento(0.28,188,54,171,99,38,1.2,0.2),2)

-1940.1

## Resultados Test OLS(Ordinary Least Squares)

In [None]:
X = np.column_stack((data['Area'], data['N'], data['P205'],
                     data['K2SO4'],data['CaO'], data['MgO'],
                     data['Zn'],data['Qfe']))

y = data['Rendimiento']

In [None]:
X2 = sm.add_constant(X)

In [None]:
Model = sm.OLS(y, X)
Results = Model.fit()

In [None]:
Results.params

x1    15841.092161
x2    -1716.605192
x3     2000.323450
x4     1296.113023
x5      -24.845602
x6     -209.281498
x7    -4115.155416
x8     9540.220670
dtype: float64

In [None]:
Results.tvalues

x1    26.794181
x2   -14.265493
x3    12.356852
x4    16.396404
x5    -0.315112
x6    -1.096971
x7    -5.130283
x8    11.915337
dtype: float64

In [None]:
print(Results.summary())
print("\n")

                                 OLS Regression Results                                
Dep. Variable:            Rendimiento   R-squared (uncentered):                   0.947
Model:                            OLS   Adj. R-squared (uncentered):              0.946
Method:                 Least Squares   F-statistic:                              697.5
Date:                Mon, 16 Dec 2019   Prob (F-statistic):                   4.76e-194
Time:                        18:00:43   Log-Likelihood:                         -3273.0
No. Observations:                 320   AIC:                                      6562.
Df Residuals:                     312   BIC:                                      6592.
Df Model:                           8                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [None]:
# Arr = []
# for x in range(0,40):
#     Arr.append(Ambos(x,0.84,148.77,40.89,136.59,17.4,26.97,6.96,1.74,3.48))
#     print("Parcela ",x+1,":\t",round(Ambos(x,0.84,148.77,40.89,136.59,17.4,26.97,6.96,1.74,3.48),3),"Kg")