# Selección de proxy 👏🏽👏🏽👏🏽

## Parámetros 🏥

In [None]:
BASE_DIR = '/content/drive/MyDrive/Colab Notebooks/EF_DataTeamLeader'
FILE_NAME = 'EF Data Team Leader'

## Entorno 🚀

In [None]:
!pip install --upgrade -q gspread

# Conectar con GSheets
from gspread import authorize
from google.colab.auth import authenticate_user
from oauth2client.client import GoogleCredentials

# Control de datos
from pathlib import Path
from pandas import DataFrame, ExcelFile

# Modelos
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import RobustScaler

## Código 🧑🏾‍💻

In [None]:
class GetProxy:
    def __init__(self, base_dir: str, file_name: str) -> None:
        self.base_dir = Path(BASE_DIR)
        self.file_name = FILE_NAME
        self.file_path = self.base_dir.joinpath(self.file_name+'.gsheet')
        if not self.file_path.is_file():
            print(f'No existe el archivo llamado: {self.file_name} en:\n{self.base_dir}')
    
    
    def read_file(self) -> None:
        gc = authorize(GoogleCredentials.get_application_default())
        read_worksheet = gc.open(self.file_name).worksheet('data_caso')
        rows = read_worksheet.get_all_values()
        df = DataFrame.from_records(rows)
        df.columns = df.iloc[1,:]
        self.data = df.drop([0,1]).reset_index(drop=True).iloc[:,1:]
        self.new_cities = [x for x,y in dict(self.data.iloc[:,:2].values).items() if y=='Sí']
    

    def set_file(self, id_col: str='municipio', to_drop: list=['nueva','orders_nov','ueats_orders','supermarkets','pob_internet']) -> None:
        df = self.data.copy()
        df.drop(to_drop, axis=1, inplace=True)
        df.set_index(id_col, inplace=True)
        for col in df.columns: 
            df[col] = df[col].map(lambda x: str(x).replace('%','').replace(',','')).astype(float)
        self.df = df.copy()


    def get_closest(self, max_dist=1e10, n_closest=17, group_closest: bool=False):
        df = self.df.copy()
        scaler = RobustScaler()
        df = DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)

        nbrs = NearestNeighbors(n_neighbors=n_closest, algorithm='ball_tree', n_jobs=-1)
        nbrs.fit(df)
        distances, indices = nbrs.kneighbors(df)

        dist_cols = ['dist_'+str(x).zfill(2) for x in range(n_closest)]
        distances = DataFrame(distances,
                                index=df.index,
                                columns=dist_cols).iloc[:,1:]
        distances = distances[distances['dist_01'] <= max_dist].copy()
        
        indices_cols = ['indice_'+str(x).zfill(2) for x in range(n_closest)]
        indices = DataFrame(indices,
                            index=df.index,
                            columns=indices_cols).iloc[:,1:]
        
        indices.replace(dict(enumerate(df.index)), inplace=True)
        closest = distances.join(indices)
        self.closest = closest.copy()

        if group_closest:
            total_closest = []
            for row in closest.index:
                aux = []
                for dist_col,id_col in zip(dist_cols[1:],indices_cols[1:]):
                    if closest.loc[row,dist_col] <= max_dist:
                        aux.append(closest.loc[row,id_col])
                total_closest.append(aux)
            closest['closest'] = total_closest
            closest['closest'] = closest['closest'].map(', '.join)
            self.closest = closest.copy()


    def split_new_cities(self, max_cities: int=3) -> None:
        df = self.closest.copy()
        df = df.filter(like='indice_')
        new_ones = []
        old_ones = []
        for row in df.index:
            aux_new = []
            aux_old = []
            for col in df.columns:
                city = df.loc[row,col]
                if city in self.new_cities: aux_new.append(city)
                else: aux_old.append(city)
            new_ones.append(aux_new[:max_cities])
            old_ones.append(aux_old[:max_cities])
        df['new_cities_proxy'] = new_ones
        df['old_cities_proxy'] = old_ones
        self.cols = ['old_cities_proxy','new_cities_proxy']
        for col in self.cols: df[col] = df[col].map(', '.join)
        self.result = df[self.cols].reset_index()


    def split_proxies(self) -> None:
        for col in self.cols:
            aux = self.result[col].str.split(', ', expand=True)
            aux.index = self.result.index
            aux.columns = [f'{col}_{i+1}' for i,_ in enumerate(aux.columns)]
            self.result = self.result.join(aux)


    def edit_gsheets(self, sheet: str='PROXY') -> None:
        rows, cols = self.result.shape
        gc = authorize(GoogleCredentials.get_application_default())
        edit_worksheet = gc.open(self.file_name).worksheet(sheet)
        edit_worksheet.update([self.result.columns.values.tolist()] + self.result.values.tolist())
        print(f'\nEl archivo "{self.file_name}" pestaña "{sheet}", fue actualizado exitosamente en:\n"{self.base_dir}"\n')
        

    def get_proxy(self) -> DataFrame:
        self.read_file()
        self.set_file()
        self.get_closest()
        self.split_new_cities()
        self.split_proxies()
        self.edit_gsheets()
        return self.result


## Autenticar con Google 🌎

In [None]:
authenticate_user()

## Donde ocurre la magia 🪄

In [None]:
sp = GetProxy(BASE_DIR, FILE_NAME)
df = sp.get_proxy()
df.head()


El archivo "EF Data Team Leader" pestaña "PROXY", fue actualizado exitosamente en:
"/content/drive/MyDrive/Colab Notebooks/EF_DataTeamLeader"



Unnamed: 0,municipio,old_cities_proxy,new_cities_proxy,old_cities_proxy_1,old_cities_proxy_2,old_cities_proxy_3,new_cities_proxy_1,new_cities_proxy_2,new_cities_proxy_3
0,Tijuana,"León, Mérida, Querétaro","Culiacán, Mexicali, Hermosillo",León,Mérida,Querétaro,Culiacán,Mexicali,Hermosillo
1,Juárez,"León, Toluca, Chihuahua","Hermosillo, Culiacán, Morelia",León,Toluca,Chihuahua,Hermosillo,Culiacán,Morelia
2,Hermosillo,"Chihuahua, Toluca, León","Culiacán, Mexicali, Juárez",Chihuahua,Toluca,León,Culiacán,Mexicali,Juárez
3,Mexicali,"Cancún, Mérida, Chihuahua","Culiacán, Cuernavaca, Hermosillo",Cancún,Mérida,Chihuahua,Culiacán,Cuernavaca,Hermosillo
4,Morelia,"Chihuahua, Cancún, León","Hermosillo, Culiacán, Mexicali",Chihuahua,Cancún,León,Hermosillo,Culiacán,Mexicali
