# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [2]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [4]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')
print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [5]:
cars1 = cars1.loc[:, 'mpg':'car']
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [6]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [7]:
cars = cars1.append(cars2)
cars

  cars = cars1.append(cars2)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [10]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([42204, 29589, 67148, 47347, 72271, 67848, 52401, 44609, 30378,
       41487, 47110, 61290, 30279, 60995, 60552, 70817, 69809, 69044,
       68900, 53871, 35289, 68932, 66271, 53425, 58029, 45242, 58588,
       40723, 47498, 38546, 63585, 31872, 34454, 63884, 20758, 55446,
       41123, 61843, 51598, 40436, 22275, 41572, 69137, 61323, 59129,
       46684, 62981, 60861, 40914, 18447, 70392, 39504, 40473, 68845,
       46693, 49560, 42510, 44785, 27990, 29617, 48913, 22322, 32306,
       71658, 71951, 67603, 35158, 35434, 51995, 66853, 44632, 44239,
       17782, 57326, 54379, 36041, 17166, 53391, 34643, 18045, 47949,
       28619, 72931, 44657, 44938, 44184, 36585, 43667, 58730, 35791,
       55138, 27330, 49002, 22330, 42886, 68336, 57028, 50132, 21820,
       72635, 17415, 18532, 71303, 66874, 19275, 34702, 66615, 68906,
       52661, 48482, 24876, 41260, 46602, 53590, 59125, 59130, 37290,
       50402, 65903, 28128, 49139, 52059, 46893, 45424, 67295, 36531,
       72342, 71359,

### Step 8. Add the column owners to cars

In [11]:
cars['owner'] = nr_owners
cars.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owner
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,43781
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,22712
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,27432
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,66903
199,31.0,4,119,82,2720,19.4,82,1,chevy s-10,36441
