# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [30]:
import numpy as np
import pandas as pd

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [13]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [18]:
cars1 = cars1.loc[:, "mpg":"car"]
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [23]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [24]:
cars = cars1.append(cars2)
cars

  cars = cars1.append(cars2)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [35]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([47223, 64451, 68202, 19635, 54522, 30230, 55311, 50239, 36060,
       64294, 52343, 46519, 15823, 32637, 48365, 40410, 68586, 27536,
       18545, 50399, 35969, 59530, 44560, 29356, 17116, 59071, 19474,
       69334, 33796, 22612, 37692, 67134, 58948, 50478, 63437, 28616,
       42071, 48260, 59225, 60339, 40977, 28862, 58968, 37034, 71016,
       25131, 16590, 51324, 64336, 64848, 37230, 31968, 21440, 68376,
       63388, 24554, 68969, 21576, 41540, 19126, 41905, 16177, 19718,
       37325, 67898, 69756, 50646, 49750, 20920, 40979, 29269, 72626,
       49248, 31789, 58034, 58281, 65573, 50215, 64871, 46763, 63331,
       32361, 30152, 29654, 38626, 36508, 47969, 29894, 25804, 29674,
       44596, 68628, 22625, 58010, 54265, 39982, 37967, 72667, 62933,
       33170, 23011, 48297, 27284, 40107, 21284, 58178, 25759, 42166,
       15627, 24657, 36006, 53531, 15624, 62384, 59247, 62223, 41156,
       45829, 64803, 72678, 53809, 17303, 65743, 42844, 53023, 66086,
       19726, 72846,

### Step 8. Add the column owners to cars

In [37]:
cars["owners"] = nr_owners
cars.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,25048
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,41891
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,33758
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,32622
199,31.0,4,119,82,2720,19.4,82,1,chevy s-10,29713
