In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors

In [3]:
def handle_bad_line(line):
    print(f"Bad line: {line}")
    return None

def load_data_with_custom_handling(file_path):
    df = pd.read_csv(file_path, delimiter=',', on_bad_lines=handle_bad_line)
    return df

df = load_data_with_custom_handling('../data/data.csv')
df.head()

ValueError: on_bad_line can only be a callable function if engine='python' or 'pyarrow'

In [None]:
def preprocess_data(df):
    le = LabelEncoder()
    for column in ['store_location', 'product_category', 'product_type', 'product_detail']:
        df[column] = le.fit_transform(df[column])
    return df

df = preprocess_data(df)
df.head()

In [None]:
def train_model(df):
    features = df[['store_location', 'product_category', 'product_type', 'unit_price']]
    model = NearestNeighbors(n_neighbors=10, algorithm='ball_tree')
    model.fit(features)
    return model

model = train_model(df)

In [None]:
def get_recommendations(model, data, new_data_point):
    distances, indices = model.kneighbors([new_data_point])
    recommended_indices = indices.flatten()
    recommended_data = data.iloc[recommended_indices]
    return recommended_data.drop_duplicates(subset=['product_detail'])

# Example of getting recommendations
new_data_point = [0, 1, 2, 3]  # Replace with actual encoded values
recommendations = get_recommendations(model, df, new_data_point)
recommendations