# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import numpy as np
import pandas as pd 

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
url1 = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv"

cars1 = pd.read_csv(url1 , sep = ",")

cars1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           198 non-null    float64
 1   cylinders     198 non-null    int64  
 2   displacement  198 non-null    int64  
 3   horsepower    198 non-null    object 
 4   weight        198 non-null    int64  
 5   acceleration  198 non-null    float64
 6   model         198 non-null    int64  
 7   origin        198 non-null    int64  
 8   car           198 non-null    object 
 9   Unnamed: 9    0 non-null      float64
 10  Unnamed: 10   0 non-null      float64
 11  Unnamed: 11   0 non-null      float64
 12  Unnamed: 12   0 non-null      float64
 13  Unnamed: 13   0 non-null      float64
dtypes: float64(7), int64(5), object(2)
memory usage: 21.8+ KB


In [3]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [4]:
url2 = "https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv"

cars2 = pd.read_csv(url2 , sep = ",")

cars2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           200 non-null    float64
 1   cylinders     200 non-null    int64  
 2   displacement  200 non-null    int64  
 3   horsepower    200 non-null    object 
 4   weight        200 non-null    int64  
 5   acceleration  200 non-null    float64
 6   model         200 non-null    int64  
 7   origin        200 non-null    int64  
 8   car           200 non-null    object 
dtypes: float64(2), int64(5), object(2)
memory usage: 14.2+ KB


In [5]:
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [6]:
cars1 = cars1.loc[:, "mpg":"car"]

#cars1 = cars1.dropna(axis = 1)
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [7]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [8]:
#cars = cars1.append(cars2)
#or
cars = pd.concat([cars1,cars2],axis=0)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [9]:
owners = np.random.randint(15000,73000,size=(len(cars),1))
owners

array([[59574],
       [68791],
       [63512],
       [21650],
       [69353],
       [67549],
       [34856],
       [17469],
       [16995],
       [62714],
       [54285],
       [71583],
       [30541],
       [37224],
       [52278],
       [21920],
       [53327],
       [21746],
       [19372],
       [36458],
       [35844],
       [30954],
       [16797],
       [43965],
       [52143],
       [44316],
       [34181],
       [68651],
       [36259],
       [36005],
       [47612],
       [51321],
       [16637],
       [60145],
       [58576],
       [36500],
       [40441],
       [37222],
       [28580],
       [24188],
       [61508],
       [26997],
       [27750],
       [53914],
       [25262],
       [58128],
       [28842],
       [46379],
       [30861],
       [38872],
       [56999],
       [53368],
       [34423],
       [29862],
       [38957],
       [30856],
       [43062],
       [55877],
       [39012],
       [25449],
       [28332],
       [60159],
       [

### Step 8. Add the column owners to cars

In [11]:
cars['owners'] = owners

cars.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,52900
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,28589
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,32886
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,38741
199,31.0,4,119,82,2720,19.4,82,1,chevy s-10,39223
