# Pandas
Read "10 minutes to Pandas": https://pandas.pydata.org/docs/user_guide/10min.html before solving the exercises.
We will use the data set "cars_data" in the exercises below. 

In [None]:
# Importing Pandas. 
import pandas as pd

### Explain what a CSV file is.

In [None]:
# Brief explanation of CSV files:

print("""A CSV (Comma-Separated Values) file is a plain text file format. It is used to store and exchange tabular data, 
such as spreadsheets or databases, in a simple way. 
In a CSV file, each line represents a row of data, and each field (or column) within a row is separated by a comma.""")

### Load the data set "cars_data" through Pandas. 

In [None]:
# When reading in the data, either you have the data file in the same folder as your python script
# or in a seperate folder.

# Code below can be ran if you have the data file in the same folder as the script
# cars = pd.read_csv("cars_data.csv")

# Code below can be ran if you have the data file in another script. 
# Notice, you must change the path according to where you have the data in your computer. 
# pd.read_csv(r'C:\Users\Antonio Prgomet\Documents\ec_utbildning\kursframstallning\ds23\python_stat\exercises\numpy_matplot_pandas\cars_data.csv')

In [None]:
# Loading the data set "cars_data" through Pandas
# Using r"" to escape backslashes (\) in the file path string and creating a DataFrame (df)

file_path = "C:\\USB\\cars_data.csv"
df = pd.read_csv(file_path)

# Printing the DataFrame (df)
print(df.to_string())

### Print the first 10 rows of the data. 

In [None]:
# Printing the first 10 rows using head() 
print(df.head(10).to_string())

In [None]:
# Printing the first 10 rows by slicing through the DataFrame
print(df[0:10].to_string())

In [None]:
# Printing the first 10 rows by iloc() to get the values associated with index range (0:10)
print(df.iloc[0:10].to_string())

### Print the last 5 rows. 

In [None]:
# Printing the last 10 rows by using tail()
print(df.tail(10).to_string())

In [None]:
# Printing the last 10 rows by slicing through the DataFrame
print(df[-10:].to_string())

In [None]:
# Printing the last 10 rows by iloc() to get the values associated with index range (-10:)
print(df.iloc[-10:].to_string())

### By using the info method, check how many non-null rows each column have. 

In [None]:
# Using the info method to check how many non-null rows each column have
print(df.info())

### If any column has a missing value, drop the entire row. Notice, the operation should be inplace meaning you change the dataframe itself.

In [None]:
# Droping the entire row if the row has a missing value (inplace operation)
df.dropna(inplace=True)
print(df.info())

### Calculate the mean of each numeric column. 

In [None]:
m = df.mean(numeric_only=True)
print(m.to_string())

### Select the rows where the column "company" is equal to 'honda'. 

In [None]:
# Selecting the rows where the column "company" is equal to 'honda'.
s = df[df["company"] == "honda"]
print(s.to_string())

### Sort the data set by price in descending order. This should *not* be an inplace operation. 

In [None]:
# Sorting the data set by price in descending order.
df.sort_values(by="price", ascending=False)

### Select the rows where the column "company" is equal to any of the values (audi, bmw, porsche).

In [None]:
# Select the rows using or operator (|)
df[(df['company'] == "audi") | (df['company'] == "bmw") | (df['company'] == "porsche")]

In [None]:
# Select the rows using isin()
df[df["company"].isin(["audi", "bmw", "porsche"])]

### Find the number of cars (rows) for each company. 

In [None]:
# Finding the number of cars for each company
print(df["company"].value_counts())

### Find the maximum price for each company. 

In [None]:
# Finding the maximum price for each company
df.groupby("company")["price"].max()