In [22]:
import pandas as pd
from pandas.api.types import is_numeric_dtype
from tabulate import tabulate 
import logging
import os
import datetime
from datetime import date

In [2]:

logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))

In [3]:
class OneLineExceptionFormatter(logging.Formatter):
    def formatException(self, exc_info):
        result = super().formatException(exc_info)
        return repr(result)
 
    def format(self, record):
        result = super().format(record)
        if record.exc_text:
            result = result.replace("\n", "")
        return result
 
handler = logging.StreamHandler()
formatter = OneLineExceptionFormatter(logging.BASIC_FORMAT)
handler.setFormatter(formatter)
root = logging.getLogger()
root.setLevel(os.environ.get("LOGLEVEL", "INFO"))
root.addHandler(handler)

In [4]:
class CarsJSON:

    def __init__(self, path):
        self.path = path
    
    def read_json(self):
        ''' Read json as pandas dataset'''   
        df = pd.read_json(self.path)
        return df

In [5]:
df = CarsJSON("cars.json")

In [28]:
cars_pd_dataset = df.read_json()

In [8]:
cars_pd_dataset.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,2200-12-12,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,2200-12-12,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,2200-12-12,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,2200-12-12,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,2200-12-12,USA


In [29]:
class Cars:

    def __init__(self, data):
        '''Define instance attributes'''
        self.data = data
        self.name = data['Name']
        self.weight = data['Weight_in_lbs']
        self.horsepower = data['Horsepower']
        self.year = pd.to_datetime(data['Year'])
    
    def unique_cars(self):
        '''Print the number of unique cars'''
        print("The number of unique cars is", self.name.nunique())

    def mean_horsepower(self):
        '''Print the mean horsepower'''
        try: 
            if not isinstance(self.horsepower, (int, float)):
                          raise ValueError("Horsepower must be float or int") 
        
        except:
            if self.horsepower.lt(0).any():
                 raise ValueError("The cannot be negative values for horsepower")  
             
            
        print("Mean horsepower is", round(self.horsepower.mean(), 2))

    def top5_heaviest_cars(self):
        '''Print a table with the 5 heaviest cars'''
        try: 
            if not isinstance(self.weight, (int, float)):
                          raise ValueError("Car weight must be float or int") 
        
        except:
            if self.weight.lt(0).any():
                 raise ValueError("There cannot be negative values for car weight")  
             
            
        df1 = pd.DataFrame({
              'Weight_in_lbs': self.weight.nlargest(5)
        })

        inner_join = pd.merge(df1, self.data, on = 'Weight_in_lbs', how = 'inner')
        print("Top heaviest cars")
        print(tabulate(inner_join[['Name', 'Weight_in_lbs']], 
                       headers = ['Name', 'Weight_in_lbs'], tablefmt = 'fabcy_grid', showindex = False))
        
    def cars_made_by_manufacturer(self):
        '''Print a table with number of cars by their origin'''
        pivot = pd.pivot_table(self.data, values = 'Name', index = 'Origin', aggfunc='count')
        pivot = pivot.sort_values(by='Origin', ascending=False)
        print("Number of cars by origin")
        print(tabulate(pivot, headers=['Origin', 'N cars made'], tablefmt='fancy_grid'))

    def cars_made_by_year(self):
        '''Print a table with number of cars made each year'''    
        self.data['Year'] = pd.to_datetime(self.data['Year']).dt.strftime('%Y')
        pivot = pd.pivot_table(self.data, values = 'Name', index = 'Year', aggfunc='count')
        pivot = pivot.sort_values(by='Year', ascending=False)
        print("Number of cars by origin")
        print(tabulate(pivot, headers=['Year', 'N cars made'], tablefmt='fancy_grid'))
            
            

In [30]:
cars_pd_dataset = Cars(cars_pd_dataset)

In [31]:
cars_pd_dataset.unique_cars()
cars_pd_dataset.mean_horsepower()
cars_pd_dataset.top5_heaviest_cars()
cars_pd_dataset.cars_made_by_manufacturer()
cars_pd_dataset.cars_made_by_year()

The number of unique cars is 311
Mean horsepower is 105.08
Top heaviest cars
Name                        Weight_in_lbs
------------------------  ---------------
pontiac safari (sw)                  5140
chevrolet impala                     4997
dodge monaco (sw)                    4955
mercury marquis brougham             4952
buick electra 225 custom             4951
Number of cars by origin
╒══════════╤═══════════════╕
│ Origin   │   N cars made │
╞══════════╪═══════════════╡
│ USA      │           254 │
├──────────┼───────────────┤
│ Japan    │            79 │
├──────────┼───────────────┤
│ Europe   │            73 │
╘══════════╧═══════════════╛
Number of cars by origin
╒════════╤═══════════════╕
│   Year │   N cars made │
╞════════╪═══════════════╡
│   1982 │            61 │
├────────┼───────────────┤
│   1980 │            29 │
├────────┼───────────────┤
│   1979 │            29 │
├────────┼───────────────┤
│   1978 │            36 │
├────────┼───────────────┤
│   1977 │           