In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.express as px
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1.  Examine the data and set goals

Goal of this analysis is to find cheap and good eletric car. Eletric cars market gets bigger and bigger nowadays. Now hybrid cars are the most popular, but it's gonna change, I guess.

So let's start with makin pandas DataFrame to store data

In [None]:
cars = pd.read_csv('../input/cheapest-electric-cars/Cheapestelectriccars-EVDatabase.csv')

Selecting first 5 rows

In [None]:
cars.head()

... And last five rows

In [None]:
cars.tail()

Using describe,info methods and examine null values

In [None]:
cars.describe()

In [None]:
cars.info()

In [None]:
cars.isnull().sum()

I'm going to change the names of the columns, and drop PriceinUK column, it's redundant

In [None]:

cars.columns = ['name', 'subtitle','acceleration_in_sec', 'top_speed_km_per_h', 'range_km', 
                'efficiency_wh_per_hour','fast_charge_speed_km_per_h','drive', 'number_of_seats', 'price_in_euros', 'price_to_drop']

In [None]:
cars.drop(columns='price_to_drop', inplace=True)

In [None]:
numeric_columns = cars.loc[:,['acceleration_in_sec', 'top_speed_km_per_h', 'range_km', 
                'efficiency_wh_per_hour','fast_charge_speed_km_per_h', 'number_of_seats', 'price_in_euros']]

In [None]:
numeric_columns

Converting objects type to numeric

In [None]:
cars.acceleration_in_sec = cars.acceleration_in_sec.str.replace('sec','').astype('float')
cars.top_speed_km_per_h.str.replace('km/h','').astype('int')
cars.range_km = cars.range_km.str.replace('km','').astype('int')
cars.efficiency_wh_per_hour = cars.efficiency_wh_per_hour.str.replace('Wh/km','').astype('int')
cars.fast_charge_speed_km_per_h = cars.fast_charge_speed_km_per_h.str.replace('km/h', '')
cars.price_in_euros = cars.price_in_euros.str.replace('€', '').str.replace(',','.').astype('float')

We still have null values in the price in euros column, I'm going to set the mean price value for missing prices

In [None]:
cars.loc[cars.price_in_euros.isnull(), 'price_in_euros'] = round(cars.price_in_euros.mean(),2)

Now let's use describe method

In [None]:
cars.describe()

Heatmap and correlation

In [None]:
sns.heatmap(cars.corr())

In [None]:
cars.corr()

We can see strong positive correlation between price and range. But we can see two negative strong correlations: accelration and range and price and acceleration. It means that when acceleration decreases price increases. But it also means that when range increases acceleration decreases.

## 2. Data visualizations

Scatter plot:

In [None]:
fig = px.scatter(cars, x = 'range_km', y='acceleration_in_sec',hover_data=['name'],
                color = 'price_in_euros', trendline='ols', trendline_scope = 'overall')

fig.update_layout(
    title={
        'text': 'eletric cars scatter plot',
        'y':0.92,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'},
    legend = {'orientation': 'h'}
)

fig.show()

Acceleration in this table is expressed in seconds: how much seconds the car needs to get from 0 to 100 km/h.
For example : Acceleration time  10 sec is worse than acceleration 5 sec in this case. But there's also one catch. When acceleration time decreases the total range you can drive increases. And when acceleration time descreases, range increases... the price increases too. It's quite logick. You want so fast and long-living eletric car? You have to pay more.

Let's group the cars by range

In [None]:
cars_range_from_90_to_400 = cars.loc[cars.range_km.between(90,400)]
cars_range_from_401_to_600 = cars.loc[cars.range_km.between(401,650)]
cars_range_from_601_to_1000 = cars.loc[cars.range_km.between(651,1000)]

fig, ax = plt.subplots(figsize=(15,5))
plt.style.use('fivethirtyeight')
sns.set_style('white')
ax.set_ylim([15,250])
ax.set_xlim([90, 1000])
ax.bar(cars_range_from_90_to_400['range_km'], cars_range_from_90_to_400['price_in_euros'],width=6)
ax.bar(cars_range_from_401_to_600['range_km'], cars_range_from_401_to_600['price_in_euros'],width=6)
ax.bar(cars_range_from_601_to_1000['range_km'], cars_range_from_601_to_1000['price_in_euros'],width=6)

spines = ['right', 'left', 'bottom', 'top'] #  setting spines
for spine in spines:
    ax.spines[spine].set_visible(False)

ax.xaxis.set_label_text('range (km)', fontsize = 18, weight = 'bold')
ax.yaxis.set_label_text('price (thousands of €)', fontsize = 18, weight = 'bold')
ax.set_title('Range categories:', weight='bold')
ax.legend(['90-400 km','401-650 km','651-1000 km'], loc="upper center")

On the plot above there is some of the cars with range between 90 - 400 km and they are more expensive than cars with range between 400 and 650. Why?

In [None]:
cars_range_from_90_to_400.loc[cars_range_from_90_to_400.price_in_euros > 150]

They're all Porsches. This cars are very exclusive and their usually more expensive than others cars.

In [None]:
top_10 = cars_range_from_90_to_400.loc[cars_range_from_90_to_400.price_in_euros < 150].sort_values(by='price_in_euros').tail(10)

In [None]:
top_10

## 3.Conclusions 

I think that this is the best 10 options to buy if you have less than 150 thousands €. If you have more in my the  winner of this competition is: **Tesla Roadster**. I made this conclusion because they all have quite big range (for me this is the most important - you should ride a car not charging it all the days). They have quite good accleration. As car charging places will be growing and growing it will be better to buy cars with lower range and bigger fast charge speed. But now - it's not such good idea.