In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import bokeh
import plotly

from bokeh.plotting import figure, output_file, show, ColumnDataSource, curdoc, output_notebook
from bokeh.models import ColumnDataSource, Slider, HoverTool, CustomJS
from bokeh.layouts import column, row, Spacer, layout

In [3]:
df = pd.read_csv('../data/car_prices.csv')

In [4]:
import re

def has_no_capital_letters(text):
    return not bool(re.search(r'[A-Z]', text))

bad_make_labels=[make for make in df['make'].dropna().unique() if has_no_capital_letters(make)]
make_labels_dict={bad_make_label:bad_make_label.title() for bad_make_label in bad_make_labels}
make_labels_dict

{'lincoln': 'Lincoln',
 'bmw': 'Bmw',
 'ford': 'Ford',
 'mitsubishi': 'Mitsubishi',
 'smart': 'Smart',
 'chevrolet': 'Chevrolet',
 'chrysler': 'Chrysler',
 'pontiac': 'Pontiac',
 'landrover': 'Landrover',
 'cadillac': 'Cadillac',
 'land rover': 'Land Rover',
 'mercedes': 'Mercedes',
 'mazda': 'Mazda',
 'toyota': 'Toyota',
 'lexus': 'Lexus',
 'gmc truck': 'Gmc Truck',
 'honda': 'Honda',
 'nissan': 'Nissan',
 'porsche': 'Porsche',
 'dodge': 'Dodge',
 'subaru': 'Subaru',
 'oldsmobile': 'Oldsmobile',
 'hyundai': 'Hyundai',
 'jeep': 'Jeep',
 'dodge tk': 'Dodge Tk',
 'acura': 'Acura',
 'volkswagen': 'Volkswagen',
 'suzuki': 'Suzuki',
 'kia': 'Kia',
 'audi': 'Audi',
 'gmc': 'Gmc',
 'maserati': 'Maserati',
 'mazda tk': 'Mazda Tk',
 'mercury': 'Mercury',
 'buick': 'Buick',
 'hyundai tk': 'Hyundai Tk',
 'mercedes-b': 'Mercedes-B',
 'vw': 'Vw',
 'chev truck': 'Chev Truck',
 'ford tk': 'Ford Tk',
 'plymouth': 'Plymouth',
 'ford truck': 'Ford Truck',
 'airstream': 'Airstream',
 'dot': 'Dot'}

In [5]:
df['make']=df['make'].apply(lambda x:make_labels_dict[x] if x in make_labels_dict else x)
make_labels_dict_2={
    'Bmw':'BMW',
    'Ford Tk':'Ford Truck',
    'Gmc':'GMC',
    'Gmc Truck':'GMC Truck',
    'Landrover':'Land Rover',
    'Mercedes-B':'Mercedes-Benz',
    'Mercedes':'Mercedes-Benz',
    'Vw':'Volkswagen',
}

df['make']=df['make'].apply(lambda x:make_labels_dict_2[x] if x in make_labels_dict_2 else x)

df['make']=df['make'].str.replace(' Tk', ' Truck',regex=True)

df['make'].unique()

array(['Kia', 'BMW', 'Volvo', 'Nissan', 'Chevrolet', 'Audi', 'Ford',
       'Hyundai', 'Buick', 'Cadillac', 'Acura', 'Lexus', 'Infiniti',
       'Jeep', 'Mercedes-Benz', 'Mitsubishi', 'Mazda', 'MINI',
       'Land Rover', 'Lincoln', 'Jaguar', 'Volkswagen', 'Toyota',
       'Subaru', 'Scion', 'Porsche', nan, 'Dodge', 'FIAT', 'Chrysler',
       'Ferrari', 'Honda', 'GMC', 'Ram', 'Smart', 'Bentley', 'Pontiac',
       'Saturn', 'Maserati', 'Mercury', 'HUMMER', 'GMC Truck', 'Saab',
       'Suzuki', 'Oldsmobile', 'Isuzu', 'Dodge Truck', 'Geo',
       'Rolls-Royce', 'Mazda Truck', 'Hyundai Truck', 'Daewoo',
       'Chev Truck', 'Ford Truck', 'Plymouth', 'Tesla', 'Airstream',
       'Dot', 'Aston Martin', 'Fisker', 'Lamborghini', 'Lotus'],
      dtype=object)

In [6]:
df.columns

Index(['year', 'make', 'model', 'trim', 'body', 'transmission', 'vin', 'state',
       'condition', 'odometer', 'color', 'interior', 'seller', 'mmr',
       'sellingprice', 'saledate'],
      dtype='object')

In [35]:
sample = df[(df['make'] == 'Kia') & (df['model'] == 'Sorento')].groupby('year')['sellingprice'].agg(lambda x: x.mean())
sample = sample.sort_index()
sample

year
2000    11300.000000
2003     2019.000000
2004     2394.696970
2005     3014.858491
2006     3581.250000
2007     4582.142857
2008     5595.794393
2009     6583.088235
2011    11398.430000
2012    14311.111111
2013    15961.650485
2014    17397.446694
2015    20339.476496
Name: sellingprice, dtype: float64

In [36]:
all_years = pd.Series(index=range(sample.index.min(), sample.index.max() + 1))
sample = sample.reindex(all_years.index)
print("Updated Series with missing years as NaN:")
print(sample)

Updated Series with missing years as NaN:
2000    11300.000000
2001             NaN
2002             NaN
2003     2019.000000
2004     2394.696970
2005     3014.858491
2006     3581.250000
2007     4582.142857
2008     5595.794393
2009     6583.088235
2010             NaN
2011    11398.430000
2012    14311.111111
2013    15961.650485
2014    17397.446694
2015    20339.476496
Name: sellingprice, dtype: float64


In [38]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, RangeSlider, CustomJS
from bokeh.layouts import column

# Create DataFrame and ColumnDataSource
sub_df = pd.DataFrame({'year': sample.index, 'sellingprice': sample.values})
source = ColumnDataSource(sub_df)

# Create the plot
plot = figure(title="Selling Price Over Years", x_axis_label='Year', y_axis_label='Selling Price')

# Add a line renderer
line = plot.line('year', 'sellingprice', source=source, line_width=2)

# Create a range slider for selecting the year range
start_year = sub_df['year'].min()
end_year = sub_df['year'].max()
range_slider = RangeSlider(start=start_year, end=end_year, value=(start_year, end_year), step=1, 
                           title="Year Range")

# Define JavaScript callback function for the range slider
callback = CustomJS(args=dict(source=source, range_slider=range_slider), code="""
    var data = source.data;
    var start = range_slider.value[0];
    var end = range_slider.value[1];
    var sellingprice = data['sellingprice'];
    var year = data['year'];
    var new_sellingprice = [];
    var new_year = [];
    for (var i = 0; i < year.length; i++) {
        if (year[i] >= start && year[i] <= end) {
            new_sellingprice.push(sellingprice[i]);
            new_year.push(year[i]);
        }
    }
    source.data['sellingprice'] = new_sellingprice;
    source.data['year'] = new_year;
    source.change.emit();
""")

range_slider.js_on_change('value', callback)

# Create layout
layout = column(plot, range_slider)

# Display the plot and range slider in the HTML file
output_file("selling_price_over_years.html")
show(layout)

gio: file:///home/dat-21127240/BigData/Big-Data-With-Seaborn-Bokeh-Plotly/Research plan demonstration/src/selling_price_over_years.html: No application is registered as handling this file


In [8]:
group = ['year', 'make', 'mmr', 'sellingprice']
df_need = pd.DataFrame(df, columns=group)
data = df_need.groupby(['year', 'make'])

In [46]:
result = data.mean().reset_index()
result

Unnamed: 0,year,make,mmr,sellingprice
0,1984,Chevrolet,3300.000000,3100.000000
1,1985,Chevrolet,2162.500000,3875.000000
2,1986,Chevrolet,5000.000000,8600.000000
3,1986,Mercedes-Benz,1625.000000,1400.000000
4,1987,Mercedes-Benz,550.000000,300.000000
...,...,...,...,...
884,2015,Smart,9750.000000,9166.666667
885,2015,Subaru,24676.694915,24541.313559
886,2015,Toyota,19977.215190,19302.995781
887,2015,Volkswagen,17110.317460,15184.831461


In [47]:
temp1 = df_need.groupby('year')['mmr'].mean()
temp2 = df_need.groupby('year')['sellingprice'].mean()
result_fluc = pd.concat([temp1, temp2], axis=1)
result_fluc.columns = ['MMR', 'Sellingprice']
result_fluc = result_fluc.reset_index()
result_fluc

Unnamed: 0,year,MMR,Sellingprice
0,1982,4925.0,12750.0
1,1983,6050.0,5250.0
2,1984,2185.0,2160.0
3,1985,3300.0,3520.0
4,1986,2359.090909,2538.636364
5,1987,1793.75,1512.5
6,1988,2990.909091,3168.181818
7,1989,1683.75,1620.0
8,1990,1694.387755,1435.714286
9,1991,1195.895522,943.656716


In [48]:
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import ColumnDataSource, Slider, HoverTool, CustomJS
from bokeh.layouts import column, row, Spacer, layout

In [49]:
new_data = {
    "year": result_fluc['year'],
    "mmr": result_fluc['MMR'],
    "selling_price": result_fluc['Sellingprice']
} 

# Load data
source = ColumnDataSource(data=data)

# Create a figure
p = figure(title="Fluctuation Chart of Average Selling Price and MMR by Year", x_axis_label="Year", y_axis_label="Price")

# Add lines
p.line(x="year", y="mmr", color="blue", source=source, line_width=2,)
p.line(x="year", y="selling_price", color="red", source=source, line_width=2,)

# Add a hover tool
hover = HoverTool(tooltips=[
    ("Year", "@year"),
    ("MMR", "@mmr{0.00}"),
    ("Selling Price", "@selling_price{0.00}"),
])
p.add_tools(hover)

# Set up layout and output
output_file("bokeh_chart.html")
show(p)

ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='year' [closest match: 'year_make'], y='mmr' [closest match: 'mmr_std'] {renderer: GlyphRenderer(id='p1888', ...)}
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='year' [closest match: 'year_make'], y='selling_price' [closest match: 'sellingprice_std'] {renderer: GlyphRenderer(id='p1897', ...)}


In [50]:
result

gio: file:///home/dat-21127240/BigData/Big-Data-With-Seaborn-Bokeh-Plotly/Research plan demonstration/src/bokeh_chart.html: No application is registered as handling this file


Unnamed: 0,year,make,mmr,sellingprice
0,1984,Chevrolet,3300.000000,3100.000000
1,1985,Chevrolet,2162.500000,3875.000000
2,1986,Chevrolet,5000.000000,8600.000000
3,1986,Mercedes-Benz,1625.000000,1400.000000
4,1987,Mercedes-Benz,550.000000,300.000000
...,...,...,...,...
884,2015,Smart,9750.000000,9166.666667
885,2015,Subaru,24676.694915,24541.313559
886,2015,Toyota,19977.215190,19302.995781
887,2015,Volkswagen,17110.317460,15184.831461


In [51]:
result[result['year'] == 1985]

Unnamed: 0,year,make,mmr,sellingprice
1,1985,Chevrolet,2162.5,3875.0


# Định nghĩa hàm cập nhật dữ liệu cho biểu đồ
def update(attr, old, new):
    selected_year = slider.value
    new_data = result[result['year'] == selected_year]
    source.data = ColumnDataSource(data=new_data).data
    print(new_data)

# Gán hàm cập nhật vào sự kiện thay đổi của thanh trượt
slider.on_change("value", update)

In [52]:

# Đọc dữ liệu từ DataFrame hoặc tạo ColumnDataSource
source = ColumnDataSource(data=result)

# Tạo một biểu đồ cột
p = figure(x_range=result['make'].unique(), title="MMR and Selling Price by Make",
           tools="pan,box_zoom,wheel_zoom,reset,save", width=1000, height=600)

# Vẽ cột cho MMR và giá bán
p.vbar(x='make', top='mmr', width=0.4, source=source, color='green', legend_label='MMR', hover_fill_alpha=0.5, hover_fill_color='lightgreen')
p.vbar(x='make', top='sellingprice', width=0.4, source=source, color='purple', legend_label='Selling Price', hover_fill_alpha=0.5, hover_fill_color='violet')

# Cấu hình biểu đồ
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.axis_label = "Make"
p.yaxis.axis_label = "Value"
p.legend.location = "top_left"
p.legend.click_policy = "hide"

# Thêm HoverTool để hiển thị thông tin chi tiết
hover = HoverTool(tooltips=[("Make", "@make"), ("MMR", "@mmr"), ("Selling Price", "@sellingprice")])
p.add_tools(hover)

# Tạo thanh trượt
slider = Slider(start=min(result['year']), end=max(result['year']), value=min(result['year']), step=1, title="Select Year")

callback = CustomJS(args=dict(source=source, slider=slider), code="""
    const year = slider.value;
    const data = source.data;
    const new_data = {
        make: [],
        mmr: [],
        sellingprice: [],
        year: []
    };
    for (var i = 0; i < data['year'].length; i++) {
        if (data['year'][i] == year) {
            new_data['make'].push(data['make'][i]);
            new_data['mmr'].push(data['mmr'][i]);
            new_data['sellingprice'].push(data['sellingprice'][i]);
            new_data['year'].push(data['year'][i]);
        }
    }
    source.data = new_data;
    console.log("Selected year:", year);
""")

hover = HoverTool(tooltips=[("Make", "@make"), ("MMR", "@mmr"), ("Selling Price", "@sellingprice"), ("Year", "@year")])
p.add_tools(hover)

slider.js_on_change("value", callback)

p.xaxis.major_label_orientation = 45

# Tạo layout và hiển thị biểu đồ
p_1 = column(p, slider)
output_file("bar_chart_with_slider.html")
show(p_1)

In [53]:
# Read data
source = ColumnDataSource(data=result)

# Create bar chart
p = figure(x_range=result['make'].unique(), title="MMR and Selling Price by Make",
           tools="pan,box_zoom,wheel_zoom,reset,save", width=1000)

# Draw a bar chart for MMR and selling price and add the HoverTool tool
p.vbar(x='make', top='mmr', width=0.4, source=source, color='blue', legend_label='MMR', hover_fill_alpha=0.5, hover_fill_color='lightblue')
p.vbar(x='make', top='sellingprice', width=0.4, source=source, color='red', legend_label='Selling Price', hover_fill_alpha=0.5, hover_fill_color='salmon')

# Setting
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.axis_label = "Make by"
p.yaxis.axis_label = "Value"
p.legend.location = "top_left"
p.legend.click_policy = "hide"

# Add hover to show data in detail
hover = HoverTool(tooltips=[("Make", "@make"), ("MMR", "@mmr"), ("Selling Price", "@sellingprice"), ("Year", "@year")])
p.add_tools(hover)

# Slider
slider = Slider(start=min(result['year']), end=max(result['year']), value=min(result['year']), step=1, title="Select Year")

# Callback
callback = CustomJS(args=dict(source=source, slider=slider), code="""
    const year = slider.value;
    const data = source.data;
    const new_data = {
        make: [],
        mmr: [],
        sellingprice: [],
        year: []
    };
    for (var i = 0; i < data['year'].length; i++) {
        if (data['year'][i] == year) {
            new_data['make'].push(data['make'][i]);
            new_data['mmr'].push(data['mmr'][i]);
            new_data['sellingprice'].push(data['sellingprice'][i]);
            new_data['year'].push(data['year'][i]);
        }
    }
    source.data = new_data;
    source.change.emit();
""")

slider.js_on_change("value", callback)
p.xaxis.major_label_orientation = 45

# Set up layout
layout = column(p, slider)
output_file("bar_chart_with_slider.html")
show(layout)


gio: file:///home/dat-21127240/BigData/Big-Data-With-Seaborn-Bokeh-Plotly/Research plan demonstration/src/bar_chart_with_slider.html: No application is registered as handling this file


gio: file:///home/dat-21127240/BigData/Big-Data-With-Seaborn-Bokeh-Plotly/Research plan demonstration/src/bar_chart_with_slider.html: No application is registered as handling this file


In [54]:
# Read data
source = ColumnDataSource(data=result)

# Create bar chart
p = figure(x_range=result['make'].unique(), title="MMR and Selling Price by Make",
           tools="pan,box_zoom,wheel_zoom,reset,save", width=1000)

# Draw a bar chart for MMR and selling price and add the HoverTool tool
p.vbar(x='make', top='mmr', width=0.4, source=source, color='blue', legend_label='MMR', hover_fill_alpha=0.5, hover_fill_color='lightblue')
p.vbar(x='make', top='sellingprice', width=0.4, source=source, color='red', legend_label='Selling Price', hover_fill_alpha=0.5, hover_fill_color='salmon')

# Setting
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.axis_label = "Make by"
p.yaxis.axis_label = "Value"
p.legend.location = "top_left"
p.legend.click_policy = "hide"

# Add hover to show data in detail
hover = HoverTool(tooltips=[("Make", "@make"), ("MMR", "@mmr"), ("Selling Price", "@sellingprice"), ("Year", "@year")])
p.add_tools(hover)

# Slider
slider = Slider(start=min(result['year']), end=max(result['year']), value=min(result['year']), step=1, title="Select Year")

# Callback
callback = CustomJS(args=dict(source=source, slider=slider), code="""
    ...
""")
slider.js_on_change("value", callback)

# Set up layout
p.xaxis.major_label_orientation = 45
layout = column(p, slider)
output_file("bar_chart_with_slider.html")
show(layout)


gio: file:///home/dat-21127240/BigData/Big-Data-With-Seaborn-Bokeh-Plotly/Research plan demonstration/src/bar_chart_with_slider.html: No application is registered as handling this file
