## Import and Analyze Text Files (CSV) with Python

[Text Data with Python](https://www.apmonitor.com/dde/index.php/Main/TextData) in the [Data-Driven Engineering](http://apmonitor.com/dde) online course.

<img align=left width=500px src='https://apmonitor.com/dde/uploads/Main/python_text.png'>

### Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Import Comma Separated Value (CSV) File

<img align=left width=200px src='https://apmonitor.com/dde/uploads/Main/auto_ford_explorer_2021.png'>

In [None]:
file = 'http://apmonitor.com/dde/uploads/Main/Auto_Ford2021.zip'
data = pd.read_csv(file,skiprows=2)

### Summarize Data

In [None]:
data.describe()

### Display First 10 Rows

In [None]:
data.head(10)

### Data Cleansing

In [None]:
# trim column names (extra leading space)
c = list(data.columns)
print('Before: ',c[0:3])
for i,ci in enumerate(c):
    c[i] = ci.strip()
print('After: ',c[0:3])
data.columns = c

In [None]:
# filter data
# remove rows with Bearing (deg)=-1 (sensors are initializing)
data = data[data['Bearing (deg)']>=0]

In [None]:
# remove data (remove last 5 rows)
data = data.iloc[:-5]

### Data Reduction

In [None]:
# keep every 10th row
data = data[::10]

### Set Time as Index

In [None]:
# set time index
data.set_index('Time (sec)',inplace=True)

### Add Column

In [None]:
data['Avg fuel economy (MPG)'] = data['Trip Distance (miles)']/data['Trip Fuel (gal)']

### Visualize Select Data

In [None]:
c = ['Vehicle speed (MPH)','Fuel rate (gal/hr)',
     'Absolute throttle position (%)',
     'Acceleration (ft/s²)','Altitude (ft)',
     'Avg fuel economy (MPG)']
data[c].plot(figsize=(10,7),subplots=True)
plt.tight_layout()
plt.savefig('Auto_data_plot.png',dpi=300)
plt.show()

### View Data Correlation

In [None]:
import seaborn as sns
sns.pairplot(data[c])
plt.show()

### Display GPS Route

In [None]:
#pip install plotly

In [None]:
import plotly.express as px
fig = px.scatter_mapbox(data, lat="Latitude (deg)", lon="Longitude (deg)", \
                        color="Vehicle speed (MPH)", size="Fuel rate (gal/hr)", \
                        color_continuous_scale=px.colors.cyclical.IceFire, size_max=5, zoom=7)
fig.update_layout(
    mapbox_style="open-street-map",
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
)
fig.show()

### Export Modified Text File

In [None]:
data.to_csv('export_data.csv')

### ✅ Activity

A 2021 Chrysler Pacifica is driven in Iowa. Compare the Ford Explorer and Chrysler Pacifica performance.

* Calculate average fuel economy for both vehicles
* Include both vehicles on a pairplot
* Create a map of the Chrysler Pacifica route

<img align=left width=200px src='https://apmonitor.com/dde/uploads/Main/auto_chrysler_pacifica_2021.png'>

### Import Data 

In [None]:
file = 'http://apmonitor.com/dde/uploads/Main/Auto_Chrysler2021.zip'
dch = pd.read_csv(file,skiprows=2)

### Data Cleansing and Preparation

In [None]:
# remove spaces from column names
c = list(dch.columns)
print('Before: ',c[0:3])
for i,ci in enumerate(c):
    c[i] = ci.strip()
print('After: ',c[0:3])
dch.columns = c

# remove front rows where distance is zero
dch = dch[dch['Trip Distance (miles)']>1e-5]

# shift start time to zero
dch['Time (sec)'] = dch['Time (sec)'] - dch['Time (sec)'].iloc[0]

# filter based on 
dch = dch[dch['Bearing (deg)']>=0]

# every 10th row
dch = dch[::10]

# set index
dch.set_index('Time (sec)',inplace=True)

### Average Fuel Economy

In [None]:
dch['Avg fuel economy (MPG)'] = dch['Trip Distance (miles)']/dch['Trip Fuel (gal)']
t1 = np.array(data.index)/60
t2 = np.array(dch.index)/60
plt.figure(figsize=(10,8))
plt.subplot(4,1,1)
plt.plot(t1,data['Vehicle speed (MPH)'].values,'r-',label='Ford')
plt.plot(t2,dch['Vehicle speed (MPH)'].values,'b--',label='Chrysler')
plt.grid(); plt.ylabel('Speed (mph)')
plt.legend()

plt.subplot(4,1,2)
plt.plot(t1,data['Trip Distance (miles)'].values,'r-',label='Ford')
plt.plot(t2,dch['Trip Distance (miles)'].values,'b--',label='Chrysler')
plt.grid(); plt.ylabel('Distance (mi)')
plt.legend()

plt.subplot(4,1,3)
plt.plot(t1,data['Trip Fuel (gal)'].values,'r-',label='Ford')
plt.plot(t2,dch['Trip Fuel (gal)'].values,'b--',label='Chrysler')
plt.grid(); plt.ylabel('Trip Fuel (gal)')
plt.legend()

plt.subplot(4,1,4)
fmpg = str(round(data['Avg fuel economy (MPG)'].iloc[-1],1))
cmpg = str(round(dch['Avg fuel economy (MPG)'].iloc[-1],1))
plt.plot(t1,data['Avg fuel economy (MPG)'].values,'r-',label='Ford MPG: '+fmpg)
plt.plot(t2,dch['Avg fuel economy (MPG)'].values,'b--',label='Chrysler MPG: '+cmpg)
plt.grid(); plt.ylabel('Fuel Economy (MPG)')
plt.xlabel('Time (min)'); plt.legend()
plt.show()

### Pairplot

In [None]:
c = ['Vehicle speed (MPH)','Fuel rate (gal/hr)',
     'Absolute throttle position (%)',
     'Acceleration (ft/s²)','Avg fuel economy (MPG)','Vehicle']
# add vehicle specification
data['Vehicle'] = 'Ford'
dch['Vehicle'] = 'Chrysler'

c1 = data[c].copy()
c2 = dch[c].copy()
c  = pd.concat((c1,c2)).reset_index(drop=True)
sns.pairplot(c,hue='Vehicle')
plt.show()

### Vehicle Route

In [None]:
import plotly.express as px
fig = px.scatter_mapbox(dch, lat="Latitude (deg)", lon="Longitude (deg)", \
                        color="Vehicle speed (MPH)", size="Avg fuel economy (MPG)", \
                        color_continuous_scale=px.colors.cyclical.IceFire, size_max=5, zoom=9)
fig.update_layout(
    mapbox_style="open-street-map",
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
)
fig.show()