## Knjižnice in spremenljivke


In [86]:
import pandas as pd
import folium as f
from datetime import datetime
import collections
import matplotlib.pyplot as plt
import plotly.express as px
from dateutil.relativedelta import relativedelta
import plotly.offline as pyo

%store -r comm_data
%store -r cars
%store -r population
plt.style.use('seaborn')
pyo.init_notebook_mode(connected=True)

cars['B-Datum prve registracije vozila'] = pd.to_datetime(cars['B-Datum prve registracije vozila'], format='%d.%m.%Y')
cars['P13-Vrsta goriva (opis)'].replace('Ni goriva', 'Elektrika')


The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.



0          Dizel
1         Bencin
2         Bencin
3         Bencin
5          Dizel
           ...  
475864    Bencin
475866    Bencin
475867     Dizel
475871     Dizel
475872     Dizel
Name: P13-Vrsta goriva (opis), Length: 346465, dtype: object

### Porazdelitev avtov na leto


In [80]:
import plotly.express as px

years = list([x.year for x in cars['B-Datum prve registracije vozila']])
years = filter(lambda x: x >= 1975, years)
years = dict(sorted(collections.Counter(years).items(), reverse=True))

fig = px.bar(x=list(years.keys()), y=list(years.values()))
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Number of registered cars',
    title='Number of registered cars by year',
    xaxis=dict(range=[1975, 2023])
)
fig.update_layout(dict( height=800))

pyo.iplot(fig)


In [81]:
import plotly.graph_objects as go

cumulative_counts = []
cumulative_count = 0
for year, count in list(years.items())[::-1]:
    cumulative_count += count
    cumulative_counts.append(cumulative_count)



# Reverse the order of years

# Create the stacked area chart
fig = go.Figure(data=[go.Scatter(x=list(years.keys())[::-1], y=cumulative_counts, mode='lines', stackgroup='one')])

# Customize the chart layout
fig.update_layout(
    title='',
)
fig.update_layout(dict( height=800))

# Show the stacked area chart
pyo.iplot(fig)


### Najpopularnejša znamka


In [82]:
car_brands = {
    "VOLKSWAGEN": "Nemčija",
    "OPEL": "Nemčija",
    "FORD": "Združene države",
    "BMW": "Nemčija",
    "MERCEDES BENZ": "Nemčija",
    "AUDI": "Nemčija",
    "RENAULT": "Francija",
    "CITROEN": "Francija",
    "PEUGEOT": "Francija",
    "ŠKODA": "Češka republika",
    "FIAT": "Italija",
    "KIA": "Južna Koreja",
    "HYUNDAI": "Južna Koreja",
    "SEAT": "Španija",
    "VOLVO": "Švedska",
    "DACIA": "Romunija",
    "NISSAN": "Japonska",
    "MAZDA": "Japonska",
    "TOYOTA": "Japonska",
    "SUZUKI": "Japonska",
    "HONDA": "Japonska",
}

In [83]:
brand = collections.Counter(cars['D1-Znamka'])
# Convert the dictionary to a DataFrame
df = pd.DataFrame({'brand': list(brand.keys()), 'value': list(brand.values())})

# Compute the total value of all brands
total_value = df['value'].sum()
# Compute the percentage of each brand
df['country'] = df['brand'].map(car_brands).fillna("Preostale države")
sum_other = df.loc[df['country'] == 'Preostale države','value'].sum()
df = df[df['country'] != 'Preostale države'].copy()

new_row = [{'brand': 'Preostale znamke', 'value': sum_other,'country':'Drugo'}]
df = pd.concat([df, pd.DataFrame(new_row)], ignore_index=True)
df['percent'] = (df['value'] / total_value) * 100


# Create the hover text to display in each rectangle
df['text'] = df['brand'] + '<br>' + df['percent'].round(2).astype(str) + '%'
# Set the Seaborn color palette
colors = sns.color_palette('husl', n_colors=len(df))
# Convert the RGB colors to Plotly color strings
color_strings = ['rgb({},{},{})'.format(int(c[0]*255), int(c[1]*255), int(c[2]*255)) for c in colors]
# Create the treemap using Plotly
fig = px.treemap(df, path=[ 'country','text'], values='value')

fig.update_layout(dict( height=800))
pyo.iplot(fig)

### Delež goriv na leto 

In [84]:
import plotly.graph_objects as go

cars['B-Datum prve registracije vozila'] = pd.to_datetime(cars['B-Datum prve registracije vozila'], format='%d.%m.%Y')
cars['P13-Vrsta goriva (opis)'].replace('Ni goriva', 'Elektrika')

years_fuel_dict = dict()
gasoline_ratios= list()
diesel_ratios= list()
electricity_ratios= list()
for y in range(1975, 2022):
    car_fuel = cars[cars['B-Datum prve registracije vozila'].dt.year == y]
    car_fuel = collections.Counter(car_fuel['P13-Vrsta goriva (opis)'])
    for key in ['Bencin', 'Dizel', 'Ni goriva']:
        if key is None or key not in car_fuel:
            car_fuel[key] = 0;
    total = car_fuel['Bencin']+ car_fuel['Dizel']+ car_fuel['Ni goriva'];
    if total!=0:
        gasoline_ratios.append(car_fuel['Bencin']/total*100)
        diesel_ratios.append(car_fuel['Dizel']/total*100)
        electricity_ratios.append(car_fuel['Ni goriva']/total*100)
        years_fuel_dict[y]= [car_fuel['Bencin']/total*100, car_fuel['Dizel']/total*100, car_fuel['Ni goriva']/total*100]



trace1 = go.Bar(x=list(years_fuel_dict.keys()), y=gasoline_ratios, name='Benzin')
trace2 = go.Bar(x=list(years_fuel_dict.keys()), y=diesel_ratios, name='Dizel')
trace3 = go.Bar(x=list(years_fuel_dict.keys()), y=electricity_ratios, name='Elektrika')

layout = go.Layout(
    title='Razmerje goriv v Sloveniji',
    barmode='stack',
    xaxis=dict(
        tickmode='array',
        tickvals=list(years_fuel_dict.keys()),
        ticktext=[str(year) + ' ' * 8 for year in years_fuel_dict.keys()]  # Adjust the spacing as needed
    ),

    margin=dict(
        l=80,  # Adjust the left margin value as needed
        r=50,
        b=50,
        t=80,
        pad=4
    )
)

# Create the figure
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.update_layout(dict( height=1000))              

# Show the figure
pyo.iplot(fig)


In [87]:
import plotly.express as px
import numpy as np
cars['P12-Nazivna moc'] = pd.to_numeric(cars['P12-Nazivna moc'], errors='coerce')
# Remove rows where 'column_name' is NaN
cars = cars.dropna(subset=['P12-Nazivna moc'])
cars['V8-Kombinirana poraba goriva'] = pd.to_numeric(cars['V8-Kombinirana poraba goriva'], errors='coerce')
# Remove rows where 'column_name' is NaN
cars = cars.dropna(subset=['V8-Kombinirana poraba goriva'])

cars['V8-Kombinirana poraba goriva'] = cars['V8-Kombinirana poraba goriva'].astype(float)
cars['P12-Nazivna moc'] = cars['P12-Nazivna moc'].astype(float)
cars['C-Starost uporabnika vozila']
# Get a boolean series representing which row satisfies the condition
mask = cars['P13-Vrsta goriva (opis)'].isin(['Bencin', 'Dizel'])
# Filter the dataframe using the boolean series
filtered_cars = cars[mask]
jitter = 0.0
filtered_cars = filtered_cars[filtered_cars['V8-Kombinirana poraba goriva'] != 0]
filtered_cars['V8-Kombinirana poraba goriva'] = filtered_cars['V8-Kombinirana poraba goriva'] + np.random.randn(len(filtered_cars['V8-Kombinirana poraba goriva'])) *jitter
filtered_cars['P12-Nazivna moc'] = filtered_cars['P12-Nazivna moc'] + np.random.randn(len(filtered_cars['P12-Nazivna moc']))*jitter**2
# Scatter plot of nominal power vs combined fuel consumption with color by fuel type

# Define the age groups and their corresponding ranges
age_groups = {
    'Boomers': (50, 70),
    'Millennials': (25, 40),
    'Gen Z': (1, 20)
}
color_map = {
    'Gen Z': 'blue',
    'Millennials': 'green',
    'Boomers': 'red'
}
# Define the bin edges
bin_edges = [0,25,50,100]

# Define the bin labels
bin_labels = ['<25', '25<50', '<100']

# Discretize the 'C-Starost uporabnika vozila' column into age groups
filtered_cars['Age Group'] = pd.cut(filtered_cars['C-Starost uporabnika vozila'], bins=bin_edges, labels=bin_labels)



fig = px.scatter(filtered_cars, x="P12-Nazivna moc", y="V8-Kombinirana poraba goriva", color="Age Group",labels={"": "Starostna skupina"})
fig.update_yaxes(range=[0, 20])
fig.update_layout(title='Moč avtomobila v primerjavi s kombinirano porabo goriva',
                   xaxis_title='Moč avtomobila (KW)',
                   yaxis_title='Kombinirana poraba goriva (l/100km)')
fig.update_layout(dict( height=1000))        



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

