In [1]:
import numpy as np
import pandas as pd
from itertools import product
np.random.seed(100)
synt_consumers = {'postcodeid':[i for i in range(100)], 
         'income':abs(np.random.normal(80000, 20000, 100)), 
         'population':np.random.uniform(4000, 7000 ,100),
         'vehicle': np.random.randint(2,size = 100),
         'flats':np.random.uniform(10, 70, 100)}
synt_stores = {'shopid':list(range(10)), 
           'size': abs(np.random.normal(500,200, 10)), 
           'smth': np.random.randint(1, 4, 10), # какие-то другие характеристики магазинов
           'smth2': np.random.randint(5, 9, 10)}
consumers_df, shops_df = pd.DataFrame(synt_consumers).astype(int), pd.DataFrame(synt_stores).astype(int)
consumers, shops = consumers_df['postcodeid'], shops_df['shopid']
cross_combinations = pd.DataFrame(tuple(product(consumers, shops)), columns=['postcodeid', 'shopid'])
cross_demographics = consumers_df.merge(
    cross_combinations, 
    left_on='postcodeid',
    right_on='postcodeid',
    how='left'
)
merged_conshops = cross_demographics.merge(
    shops_df, 
    left_on='shopid',
    right_on='shopid',
    how='left'
)
merged_conshops['km'] = np.random.uniform(2, 15, 1000) 
merged_conshops.head()

merged_conshops['dist*vehicle'] = merged_conshops['km'] * merged_conshops['vehicle']
merged_conshops['log(income)'] = np.log(merged_conshops['income'])
merged_conshops['dist*log(inc)'] = merged_conshops['km'] * merged_conshops['log(income)']
merged_conshops['log(size)'] = np.log(merged_conshops['size'])
merged_conshops['log(size)*log(inc)'] = merged_conshops['log(size)'] * merged_conshops['log(income)']

merged_conshops.head(10)


def util(data, params):
    '''
    Calculates each agents utility from his/her set of alternatives
    data: dataframe (data for estimation)
    params: full array of updating parameters 
    '''
    utilities = data.iloc[:, 9:].values.dot(params) #first 4 columns are 'shoptype', 'postcode', 'INDEX', 'shopid' 
    data = data.copy() #shitty move
    data.loc[:, 'utility'] = utilities
    
    # Outside optionn credentials
    postcodes0 = list(set(data['postcodeid'].astype(int)))
    utilities0 = [0] * len(postcodes0)
    outsideData = pd.DataFrame({'postcodeid': postcodes0, 'utility': utilities0})
    
    dataWithUtilities = (pd.concat([data, outsideData], sort=False)
              .sort_values(by='postcodeid')
              .reset_index()
              .fillna(0)
            ) #data with all necessaary utilities

    return dataWithUtilities

# initialize the parameters
params = [i for i in range(6)]

dataUtil = util(merged_conshops, params)

def data_revenue(data, params):
    '''
    Calculates all of the revenue credentials and values
    data: estimation data
    params: full array of updating params
    Output: utilData and revenue_array for calculations in next steps
    '''
    # Initialize empty sets
    utilities_dict = dict()
    incomes_dict = dict()
    households_dict = dict()

    # Iterate over the rows of the dataframe 
    for index, row in data.iterrows():
        # Get the consumer ID and product ID
        consumer_id = row['postcodeid']
        product_id = row['shopid']
        utility = row['utility']
        income = row['income']
        household = row['flats']

        # Check if the consumer ID is already in the dictionary
        if (consumer_id in utilities_dict) or (consumer_id in incomes_dict) or (consumer_id in households_dict):
            # If the consumer ID is already in the dictionary, append the utility to the corresponding array
            utilities_dict[consumer_id].append(utility)
            incomes_dict[consumer_id].append(income)
            households_dict[consumer_id].append(household)
        else:
            # If the consumer ID is not in the dictionary, create a new array with the utility and add it to the dictionary
            utilities_dict[consumer_id] = [utility]
            incomes_dict[consumer_id] = [income]
            households_dict[consumer_id] = [household]

    # Apply the probability function to the lists of utilities for each consumer
    probabilities_dict = {}
    revenues_dict = {}
    for consumer_id in utilities_dict:
        #calculate the probabilities - let's try to normalize them a bit differently than just demaxing
        utilities = (utilities_dict[consumer_id] - np.min(utilities_dict[consumer_id])) / (np.max(utilities_dict[consumer_id]) - np.min(utilities_dict[consumer_id])) #utilities_dict[consumer_id] - np.max(utilities_dict[consumer_id])
        utilities_ccp = np.exp(utilities) / (np.sum(np.exp(utilities)) + 1e-3)
        probabilities_dict[consumer_id] = utilities_ccp
        #calculate the revenuews
        revenue = np.array(params[-1]) * incomes_dict[consumer_id] * households_dict[consumer_id] * probabilities_dict[consumer_id]
        revenues_dict[consumer_id] = revenue
    revenues_array = np.concatenate(list(revenues_dict.values()))

    return data, revenues_array

ModuleNotFoundError: No module named 'numpy'

In [None]:
dataUtil, revenue_array = data_revenue(dataUtil, params)
print(dataUtil, revenue_array)

NameError: name 'data_revenue' is not defined