In [None]:
import pandas as pd
import numpy as np
import re
import os
import pystan
import geopandas
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

In [None]:
carpeta_path = r'C:\Users\William\Desktop\el algoritmo es correcto'
archivo_train = r'train_data.csv'
path_train = os.path.join(carpeta_path, archivo_train)

In [None]:
train = pd.read_csv(
    path_train, 
    dtype={
        "tiempodeconstruido": str,
        "vista": str,
        "tipoinmueble": str,
        "tiponegocio": str,
        'valorventa': 'int64',
        # 'area': 'int64'
    }
)
train['data'] = 'train'

In [None]:
carpeta_path = r'C:\Users\William\Desktop\el algoritmo es correcto'
archivo_test = r'test_data.csv'
path_test = os.path.join(carpeta_path, archivo_test)

In [None]:
test = pd.read_csv(
    path_test, 
    dtype={
        "tiempodeconstruido": str,
        "vista": str,
        "tipoinmueble": str,
        "tiponegocio": str,
        'valorventa': 'int64',
        # 'area': 'int64'
    }
)
test['data'] = 'test'

In [None]:
prueba = pd.concat([train, test], ignore_index=True)

In [None]:
prueba = prueba[
    (prueba['porteria'].isna()) &
    (prueba['conjuntocerrado']==0) &
    (prueba['tipoinmueble']=='Casa') &
    (prueba['ascensor'].isna()) &
    ((prueba['gimnasio'].isna()) | (prueba['gimnasio']==0)) &
    (prueba['piscina'].isna()) &
    ((prueba['area']<1500) & (prueba['area']>35)) &
    ((prueba['piso']<5) | (prueba['piso'].isna())) &
    ((prueba['habitaciones']<15) & (prueba['habitaciones']>0))
]

In [None]:
columnas_no_usar = ['ascensor', 'tipoinmueble', 'porteria', 'conjuntocerrado', 'gimnasio', 'piscina', 'valoradministracion','tiponegocio']
prueba = prueba.drop(columns = columnas_no_usar)

In [None]:
condlist = [
    (prueba['banos']<=1) | (prueba['banos'].isna()), 
    (prueba['banos']==2), 
    (prueba['banos'].isin([3,4,5])), 
    (prueba['banos'].isin([6,7,8,9])),
    (prueba['banos']>=10)]
choicelist = ['1', '2', '3-5', '6-9', '10+']
prueba['banos modificado'] = np.select(condlist=condlist, choicelist=choicelist)

condlist = [
    (prueba['garajes'].isna()),
    (prueba['garajes']<=1), 
    (prueba['garajes']>1) & (prueba['garajes']<=3), 
    (prueba['garajes']>3)]
choicelist = ['N.N', '0-1', '2-3', '4+']
prueba['garajes modificado3'] = np.select(condlist=condlist, choicelist=choicelist)

condlist = [(prueba['piso'].isna()), (prueba['piso']>0)]
choicelist = ['No define', prueba['piso'].astype(str)]
prueba['piso modificado3'] = np.select(condlist=condlist, choicelist=choicelist)

condlist = [(prueba['vista'].isna()), (prueba['vista'].notna())]
choicelist = ['No define', prueba['vista']]
prueba['vista modificado'] = np.select(condlist=condlist, choicelist=choicelist)

condlist = [(prueba['vista'].isna()), (prueba['vista'].notna())]
choicelist = ['Exterior', prueba['vista']]
prueba['vista modificado2'] = np.select(condlist=condlist, choicelist=choicelist) 

condlist = [(prueba['zonalavanderia'].isna()), (prueba['zonalavanderia'].notna())]
choicelist = [0, 1]
prueba['zonalavanderia modificado'] = np.select(condlist=condlist, choicelist=choicelist)

condlist = [(prueba['vigilancia'].isna()) | (prueba['vigilancia']==0), (prueba['vigilancia']==1)]
choicelist = ['No define-0', prueba['vigilancia'].astype(str)]
prueba['vigilancia modificado'] = np.select(condlist=condlist, choicelist=choicelist)

condlist = [(prueba['deposito'].isna()), (prueba['deposito'].notna())]
choicelist = [0, prueba['deposito']]
prueba['deposito modificado'] = np.select(condlist=condlist, choicelist=choicelist)

prueba['tiempodeconstruido modificado'] = np.where(
    prueba['tiempodeconstruido'].isna(),
    'No define',
    prueba['tiempodeconstruido']
)

columnas_valor0 = [
    'balcon', 'banoservicio', 'cuartoservicio', 'estudio', 'halldealcobas', 
    'remodelado', 'parqueaderovisitantes', 'saloncomunal','terraza']
prueba[columnas_valor0] = prueba[columnas_valor0].fillna(0)



In [None]:
prueba['y'] = (prueba['valorventa']/prueba['area']) 

In [None]:
prueba = prueba[
    (prueba['y']<prueba['y'].quantile(0.95).astype('int64')) &
    (prueba['y']>prueba['y'].quantile(0.05).astype('int64'))]

In [None]:
localidades_path = r'C:\Users\William\Desktop\el algoritmo es correcto\Datos abiertos bogota\localidades.json'
localidades = geopandas.read_file(localidades_path, crs={'init':'epsg:4686'})

In [None]:
localidades = localidades[['LocNombre', 'geometry']]

In [None]:
estratificacion_path = r'C:\Users\William\Desktop\el algoritmo es correcto\Datos abiertos bogota\manzana_estratificacion.json'
estratificacion = geopandas.read_file(estratificacion_path, crs={'init':'epsg:4686'})

In [None]:
estratificacion = estratificacion[['ESTRATO', 'geometry']]

In [None]:
avaluo_path = r'C:\Users\William\Desktop\el algoritmo es correcto\Datos abiertos bogota\avaluo'
avaluo = geopandas.read_file(avaluo_path, crs={'init':'epsg:4686'})

In [None]:
avaluo = avaluo[["GRUPOP_TER", "geometry"]] # MANZANA_ID	CP_TERR_AR

In [None]:
casas = geopandas.GeoDataFrame(
    prueba, geometry=geopandas.points_from_xy(prueba.longitud, prueba.latitud), crs={'init': 'epsg:4326'})

In [None]:
interseccion1 = geopandas.sjoin(left_df=casas, right_df=localidades, how='left', op='intersects').drop(columns=['index_right'])
interseccion2 = geopandas.sjoin(left_df=interseccion1, right_df=estratificacion, how='left', op='intersects').drop(columns=['index_right'])
interseccion3 = geopandas.sjoin(left_df=interseccion2, right_df=avaluo, how='left', op='intersects').drop(columns=['index_right'])

In [None]:
interseccion3

In [None]:
interseccionn = interseccion3

In [None]:
interseccionn['ESTRATO'] = np.where(
    interseccionn['ESTRATO'].isna() | interseccionn['ESTRATO']==0,
    np.NaN,
    interseccionn['ESTRATO']
)

In [None]:
train = interseccionn.loc[interseccionn['ESTRATO'].notna(),['ESTRATO', 'latitud', 'longitud']]
X = train[['latitud', 'longitud']].values
y = train[['ESTRATO']].values
# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
neigh = KNeighborsClassifier(n_neighbors=3, metric = 'euclidean')
neigh.fit(X_train, y_train.ravel())
y_pred = neigh.predict(X_test)
print("R^2: {}".format(neigh.score(X_test, y_test)))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error: {}".format(rmse))
print("Accuracy: {}".format(accuracy_score(y_test, y_pred)))
valores = interseccionn.loc[interseccionn['ESTRATO'].isnull(),['latitud', 'longitud']].values
interseccionn.loc[interseccionn['ESTRATO'].isnull(),'ESTRATO'] = neigh.predict(valores)

In [None]:
indices = {}
for i, grupo in enumerate(interseccionn[interseccionn['GRUPOP_TER'].notna()]['GRUPOP_TER'].unique()):
    indices[i] = grupo
    interseccionn.loc[interseccionn['GRUPOP_TER']==grupo, ['GRUPOP_TER']] = i
train = interseccionn.loc[interseccionn['GRUPOP_TER'].notna(),['GRUPOP_TER', 'latitud', 'longitud']]
X = train[['latitud', 'longitud']].values
y = train[['GRUPOP_TER']].astype(int).values
# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
neigh = KNeighborsClassifier()#algorithm='brute', n_neighbors=5,  leaf_size=1, p=2, weights='distance', metric='chebyshev', n_jobs=-1)
neigh.fit(X_train, y_train)
y_pred = neigh.predict(X_test)
print("R^2: {}".format(neigh.score(X_test, y_test)))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error: {}".format(rmse))
print("Accuracy: {}".format(accuracy_score(y_test, y_pred)))
valores = interseccionn.loc[interseccionn['GRUPOP_TER'].isnull(),['latitud', 'longitud']].values
interseccionn.loc[interseccionn['GRUPOP_TER'].isnull(),'GRUPOP_TER'] = neigh.predict(valores)
for i, grupo in indices.items():
    interseccionn.loc[interseccionn['GRUPOP_TER']==i, ['GRUPOP_TER']] = grupo

In [None]:
interseccionn = interseccionn.drop(
    columns=[
        'banos', 'deposito', 'estrato', 'garajes', 'piso', 
        'tiempodeconstruido', 'vigilancia', 'vista',
        'zonalavanderia', 'latitud', 'longitud', 
        'vista modificado2', 'geometry'
    ])

In [None]:
interseccionn.columns

In [None]:
interseccionn = interseccionn.rename(columns={'banos modificado':'banos_modificado', 'garajes modificado3': 'garajes_modificado3', 'piso_modificado3': 'piso modificado3', 'vista modificado': 'vista_modificado', 'zonalavanderia modificado': 'zonalavanderia_modificado', 'vigilancia modificado': 'vigilancia_modificado', 'deposito modificado': 'deposito_modificado', 'tiempodeconstruido modificado': 'tiempodeconstruido_modificado'})

In [None]:
import pybrms

In [None]:
model = pybrms.fit_model(
    "y ~ balcon + banoservicio + cuartoservicio + estudio + habitaciones + halldealcobas + parqueaderovisitantes + remodelado + saloncomunal + terraza + valorventa + banos_modificado + garajes_modificado3 + piso_modificado3 + vista_modificado + zonalavanderia_modificado + vigilancia_modificado + deposito_modificado + tiempodeconstruido_modificado + (1 + area + valorventa|LocNombre/GRUPOP_TER/ESTRATO)",
    data = interseccionn.values,
    family = "Gamma",
    priors = [("Gamma(1,1)", " b "), ("Gamma(1,1)", "sd")],
    # chains = 1,
    # iter = 5000
    )

In [None]:
import sys 
sys.path.append(r'C:\Users\William\Desktop')
from pybrms2 import fit

In [None]:
import pystan

In [None]:
import rpy2.robjects.packages as rpackages
from rpy2.robjects import StrVector
utils = rpackages.importr("utils")
utils.install_packages(StrVector(('brms',)))
brms = rpackages.importr("brms")

In [None]:
from pybrms import pybrms

In [None]:
import rpy2.robjects.packages as rpackages
from rpy2.robjects import StrVector
utils = rpackages.importr("utils")
utils.install_packages(StrVector(('brms',)))
brms = rpackages.importr("brms")

In [None]:
dir(brms)

In [None]:
import rpy2.robjects.packages as rpackages
brms = rpackages.importr("brms")
# from pybrms import pybrms
epilepsy = brms.get_brms_data("epilepsy")
model = brms.fit(
formula = "count ~ zAge + zBase x Trt + (1 | patient)",
data = epilepsy,
family = "poisson"
)

In [1]:
import statsmodels.api as sm

In [2]:
data = sm.datasets.scotland.load(as_pandas=False)
data.exog = sm.add_constant(data.exog)

In [3]:
import numpy as np

In [None]:
data.exog

In [None]:
np.ones((len(data.endog),1))

In [None]:
help(sm.families.Gamma())

In [None]:
gamma_model = sm.GLM(data.endog, np.ones((len(data.endog),1)), family=sm.families.Gamma(link=sm.families.links.log))

In [None]:
resultado = gamma_model.fit()

In [None]:
print(resultado.summary())

In [14]:
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

from rpy2.robjects.conversion import localconverter
pd_df = pd.DataFrame({'int_values': [1,2,3],
                      'str_values': ['abc', 'def', 'ghi']})

pd_df

Unnamed: 0,int_values,str_values
0,1,abc
1,2,def
2,3,ghi


In [17]:
with localconverter(ro.default_converter + pandas2ri.converter):
  r_from_pd_df = ro.conversion.py2rpy(pd.DataFrame(data.endog, columns=["y"]))

r_from_pd_df

y
...


In [None]:
resultado.predict(1)

In [4]:
import rpy2.robjects.packages as rpackages
gamlss = rpackages.importr("gamlss")

In [5]:
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

In [13]:
import pandas as pd
from rpy2.robjects import pandas2ri
import rpy2.robjects as ro
r_from_pd_df = ro.conversion.py2rpy()

NotImplementedError: Conversion 'py2rpy' not defined for objects of type '<class 'pandas.core.frame.DataFrame'>'

In [19]:
f = gamlss.gamlss(formula = rpackages.reval("y~1"), data=r_from_pd_df,family = "GA")

GAMLSS-RS iteration 1: Global Deviance = 222.6459 
GAMLSS-RS iteration 2: Global Deviance = 222.6459 


In [80]:
f[0]

0,1
'NO','Normal'


In [None]:
x = gamlss.histDist(data.exog, family = "GA")

In [None]:
x

In [None]:
help(gamlss)

In [None]:
from pygam import GammaGAM

In [None]:
help(GammaGAM)

In [None]:
data.endog

In [None]:
gamm=GammaGAM().fit(np.ones((len(data.exog),1)),data.endog)

In [None]:
gamm.summary()

In [None]:
gamm._estimate_model_statistics

In [None]:
dir(gamm)