<a href="https://colab.research.google.com/github/RicAntonio/Datos_de_ventas_de_productos_agr-colas_-2022-2023-/blob/main/Agricultural_Products_Sales_Data_(2022_2023).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Datos de ventas de productos agrícolas (2022-2023)

In [None]:
import pandas as pd
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive', force_remount=True)

# Ruta a los archivos en Drive
input_filepath = '/content/drive/My Drive/Agricultural_sales_2022-2023.csv'
output_filepath = '/content/drive/My Drive/agricultural_features.csv'

# 1. Cargar datos desde el CSV de Google Drive
def load_data(filepath):
    return pd.read_csv(filepath)

# 2. Realizar ingeniería de características
def engineer_features(df):
    df['sales_growth'] = df['units_sold_kg'] / df['units_on_hand_kg'].shift(1)  # Creación de tasa de crecimiento de ventas
    df = pd.get_dummies(df, columns=['product_id', 'category'])  # Convertir categóricas a dummies
    df = df.fillna(0)  # Rellenar NaNs
    return df

# 3. Guardar las características en Google Drive
def save_features(features, output_filepath):
    features.to_csv(output_filepath, index=False)

# Pipeline
df = load_data(input_filepath)
features = engineer_features(df)
save_features(features, output_filepath)

print(f"Características guardadas en: {output_filepath}")


Mounted at /content/drive
Características guardadas en: /content/drive/My Drive/agricultural_features.csv


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle
from google.colab import drive
import numpy as np

# Montar Google Drive
drive.mount('/content/drive', force_remount=True)

# Rutas a los archivos en Google Drive
features_filepath = '/content/drive/My Drive/agricultural_features.csv'
model_output_filepath = '/content/drive/My Drive/agricultural_sales_model.pkl'

# 1. Cargar las características desde Google Drive
def load_features(filepath):
    return pd.read_csv(filepath)

# 2. Preprocesar datos: convertir categóricas a variables numéricas (one-hot encoding)
#    y manejar valores no finitos (NaN, Inf)
def preprocess_features(df):
    # Convertir todas las columnas categóricas a numéricas con pd.get_dummies()
    df = pd.get_dummies(df, drop_first=True)  # Utiliza one-hot encoding

    # Manejar valores no finitos (NaN o Inf)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)  # Reemplaza Inf por NaN
    df.fillna(df.median(), inplace=True)  # Rellena NaN con la mediana de cada columna

    return df

# 3. Entrenar el modelo
def train_model(features):
    # Suponiendo que 'units_sold_kg' es la variable que quieres predecir
    X = features.drop('units_sold_kg', axis=1)  # Excluye la columna de ventas
    y = features['units_sold_kg']  # Columna objetivo

    # Dividir los datos en entrenamiento y prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Entrenar el modelo
    model = RandomForestRegressor()
    model.fit(X_train, y_train)

    return model

# 4. Guardar el modelo en Google Drive
def save_model(model, output_filepath):
    with open(output_filepath, 'wb') as f:
        pickle.dump(model, f)

# Pipeline
features = load_features(features_filepath)
features = preprocess_features(features)  # Asegurarse de convertir categóricas a numéricas y manejar NaN/Inf
model = train_model(features)
save_model(model, model_output_filepath)

print(f"Modelo guardado en: {model_output_filepath}")


Mounted at /content/drive
Modelo guardado en: /content/drive/My Drive/agricultural_sales_model.pkl


In [None]:
import pandas as pd
import numpy as np
import pickle
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive', force_remount=True)

# Rutas a los archivos en Google Drive
model_filepath = '/content/drive/My Drive/agricultural_sales_model.pkl'
features_filepath = '/content/drive/My Drive/agricultural_features.csv'
predictions_output_filepath = '/content/drive/My Drive/agricultural_sales_predictions.csv'

# 1. Cargar el modelo desde Google Drive
def load_model(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)

# 2. Cargar las características más recientes desde Google Drive
def load_features(filepath):
    return pd.read_csv(filepath)

# 3. Generar predicciones
def generate_predictions(model, features):
    # Obtener las columnas que el modelo espera
    expected_features = model.feature_names_in_

    # Crear un DataFrame con las características faltantes
    missing_features = [feature for feature in expected_features if feature not in features.columns]
    missing_df = pd.DataFrame(0, index=features.index, columns=missing_features)

    # Concatenar el DataFrame original con las columnas faltantes
    features = pd.concat([features, missing_df], axis=1)

    # Mantener solo las columnas que el modelo espera y en el orden correcto
    features = features[expected_features]

    # Verificar y manejar valores infinitos o NaNs solo en columnas numéricas
    numeric_features = features.select_dtypes(include=[np.number])

    if not np.isfinite(numeric_features.values).all():  # Verificar todos los valores numéricos
        features = features.fillna(0)  # Rellenar NaNs con 0
        features = features.replace([np.inf, -np.inf], 0)  # Reemplazar infinitos con 0

    predictions = model.predict(features)
    return predictions

# 4. Guardar las predicciones en Google Drive
def save_predictions(predictions, output_filepath):
    pd.DataFrame(predictions, columns=['predicted_sales']).to_csv(output_filepath, index=False)

# Pipeline
model = load_model(model_filepath)
features = load_features(features_filepath)
predictions = generate_predictions(model, features)
save_predictions(predictions, predictions_output_filepath)

print(f"Predicciones guardadas en: {predictions_output_filepath}")


Mounted at /content/drive
Predicciones guardadas en: /content/drive/My Drive/agricultural_sales_predictions.csv
