In [16]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import random
from src import weekly
from distributions import UniformDistribution

In [3]:
file_to_load = Path.cwd().parent.joinpath('data').joinpath('chipotle.tsv')
food = pd.read_csv("C:/Users/User\Documents\GitHub\ECOPY_23241\data\chipotle.tsv", delimiter='\t')

In [7]:
food

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
...,...,...,...,...,...
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",$11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",$11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75


In [4]:
def change_price_to_float(input_df):
    food = input_df.copy()
    food['item_price'] = food['item_price'].str.replace('$', '').astype(float)
    return food

In [5]:
food_clear = change_price_to_float(food)

In [15]:
def unique_items_over_ten_dollars(input_df):
    new_df = input_df.copy()
    filtered_df = new_df[new_df["item_price"] > 10]
    # Duplikációk eltávolítása
    unique_items_df = filtered_df.drop_duplicates(subset=["item_name", "choice_description", "item_price"])
    return unique_items_df[["item_name", "choice_description", "item_price"]]

In [16]:
unique_items_over_ten_dollars(food_clear)

Unnamed: 0,item_name,choice_description,item_price
4,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
5,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98
7,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
13,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25
23,Chicken Burrito,"[[Tomatillo-Green Chili Salsa (Medium), Tomati...",10.98
...,...,...,...
4593,Carnitas Bowl,"[Roasted Chili Corn Salsa, [Rice, Sour Cream, ...",11.75
4594,Barbacoa Bowl,"[Roasted Chili Corn Salsa, [Pinto Beans, Sour ...",11.75
4607,Steak Burrito,"[Tomatillo Green Chili Salsa, [Rice, Cheese, S...",11.75
4610,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",11.75


In [13]:
def items_starting_with_s(input_df):
    new_df = input_df.copy()
    selected_items = new_df[new_df['item_name'].str.startswith('S')]
    unique_selected_items_df = selected_items.drop_duplicates(subset=["item_name"])
    return unique_selected_items_df["item_name"].reset_index(drop=True)

In [14]:
items_starting_with_s(food_clear)

0         Side of Chips
1         Steak Burrito
2      Steak Soft Tacos
3            Steak Bowl
4    Steak Crispy Tacos
5      Steak Salad Bowl
6           Steak Salad
7                 Salad
Name: item_name, dtype: object

In [6]:
def generate_quartile(input_df):
    new_df = input_df.copy()
    new_df['Quartile'] = pd.cut(input_df['item_price'], [-1, 10, 20, 30, float('inf')], labels=['low-cost', 'medium-cost', 'high-cost', 'premium'], right=False).astype('object')
    return new_df

In [7]:
food_quartile = generate_quartile(food_clear)

In [48]:
def average_price_in_quartiles(input_df):
    new_df = input_df.copy()
    avg_price_df = new_df.groupby('Quartile')['item_price'].mean().reset_index(drop=True)
    return avg_price_df

In [49]:
average_price_in_quartiles(food_quartile)

0    23.181951
1     5.869593
2    11.853084
3    36.031667
Name: item_price, dtype: float64

In [1]:
def minmaxmean_price_in_quartile(input_df):
    new_df = input_df.copy()
    result = new_df.groupby('Quartile')['item_price'].agg(['min', 'max', 'mean']).reset_index(drop=True)
    result.columns = ['min', 'max', 'mean']
    return result

In [8]:
minmaxmean_price_in_quartile(food_quartile)

Unnamed: 0,min,max,mean
0,21.96,27.75,23.181951
1,1.09,9.39,5.869593
2,10.5,18.5,11.853084
3,32.94,44.25,36.031667


In [23]:
def gen_uniform_mean_trajectories(distribution, number_of_trajectories, length_of_trajectory):
    random.seed(42)
    trajectories = []  # Ebben a listában tároljuk az eredményt

    for _ in range(number_of_trajectories):
        trajectory = []  # Ebben a listában tároljuk a kumulatív átlagokat
        cumulative_sum = 0.0  # Kezdetben a kumulatív átlag nulla

        for _ in range(length_of_trajectory):
            random_value = distribution.gen_random()  # Véletlen szám a megadott eloszlás alapján
            cumulative_sum += random_value  # Frissítjük a kumulatív átlagot
            trajectory.append(cumulative_sum)  # Hozzáadjuk a kumulatív átlagot a belső listához

        trajectories.append(trajectory)  # Hozzáadjuk a belső listát az eredmény listához

    return trajectories

In [21]:
dist = UniformDistribution(random, 0, 1)

In [24]:
gen_uniform_mean_trajectories(dist, 2, 100)

[[0.6394267984578837,
  0.6644375536805507,
  0.9394668720496699,
  1.1626776101984926,
  1.899148824362505,
  2.5758483117854163,
  3.4680278794902617,
  3.5549667121196777,
  3.9768885318049483,
  4.006685751243019,
  4.225323726046622,
  4.730679014149985,
  4.757214983833848,
  4.956052634520497,
  5.60593707230002,
  6.150878552903237,
  6.371319174943933,
  6.960584858819842,
  7.770015315497669,
  7.776514075175729,
  8.582333327008538,
  9.280472721996764,
  9.620723238514756,
  9.776202738326537,
  10.733415810533318,
  11.070010355645945,
  11.162756199026093,
  11.259472575859558,
  12.106966942207018,
  12.71069297357391,
  13.517821246848289,
  14.247553033542108,
  14.783781124996809,
  15.75689688897618,
  16.135431266184533,
  16.68747189745776,
  17.516876561710752,
  18.135396314075,
  18.997103214385778,
  19.57445535964254,
  20.279027195857463,
  20.324851579513126,
  20.552749855164674,
  20.84213781876678,
  20.921929795690406,
  21.154720682051437,
  21.25572211