# Appending Data
First, import the necessary packages and load `winequality-red.csv` and `winequality-white.csv`.

In [2]:
# import numpy and pandas
import numpy as np
import pandas as pd
# load red and white wine datasets
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')
white_wine = pd.read_csv('winequality-white.csv', delimiter=';')

## Create Color Columns
Create two arrays as long as the number of rows in the red and white dataframes that repeat the value “red” or “white.” NumPy offers really easy way to do this. Here’s the documentation for [NumPy’s repeat](https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html) function. Take a look and try it yourself.

In [3]:
# create color array for red dataframe
red_color = np.repeat('red', red_wine.shape[0])
# create color array for white dataframe
white_color = np.repeat('white', white_wine.shape[0])

Add arrays to the red and white dataframes. Do this by setting a new column called 'color' to the appropriate array.

In [4]:
red_wine['color'] = red_color
white_wine['color'] = white_color

Do the same for the white dataframe and use `head()` to confirm the change.

In [5]:
print(white_wine.head())

   fixed_acidity  volatile_acidity  citric_acid  residual_sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free_sulfur_dioxide  total_sulfur_dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0.9951  3.26       0.44   
3                 47.0                 186.0   0.9956  3.19       0.40   
4                 47.0                 186.0   0.9956  3.19       0.40   

   alcohol  quality  color  
0      8.8        6  white  
1      9.5        6  white  

## Combine DataFrames with Append
Check the documentation for [Pandas' append](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.append.html) function and see if you can use this to figure out how to combine the dataframes.

In [6]:
# append the dataframes  (There are 3 ways to do this, can you use them all ?)
# Way 1: Using concat
wine = pd.concat([red_wine, white_wine], ignore_index=True)

# Way 2: Using append
wine = red_wine.append(white_wine, ignore_index=True)

# Way 3: Using join
wine = red_wine.join(white_wine, lsuffix='_red', rsuffix='_white')
# view dataframe to check for success
print(wine.head())

   fixed_acidity_red  volatile_acidity_red  citric_acid_red  \
0                7.4                  0.70             0.00   
1                7.8                  0.88             0.00   
2                7.8                  0.76             0.04   
3               11.2                  0.28             0.56   
4                7.4                  0.70             0.00   

   residual_sugar_red  chlorides_red  free_sulfur_dioxide_red  \
0                 1.9          0.076                     11.0   
1                 2.6          0.098                     25.0   
2                 2.3          0.092                     15.0   
3                 1.9          0.075                     17.0   
4                 1.9          0.076                     11.0   

   total_sulfur-dioxide  density_red  pH_red  sulphates_red  ...  \
0                  34.0       0.9978    3.51           0.56  ...   
1                  67.0       0.9968    3.20           0.68  ...   
2                  54.0   

  wine = red_wine.append(white_wine, ignore_index=True)


## Scroll to right, you will find a column filled with NaN values. Go watch the next video and get back here to solve the problem (This is necessary for the next tasks !!)

In [7]:
# fix column names (do not use the usual solution we used earlier in the previous tasks)
wine = wine.rename(columns={'total_sulfur-dioxide': 'total_sulfur_dioxide'})
# merge the two datasets again after fixing the issue (keep the color column)
wine = pd.concat([red_wine, white_wine], ignore_index=True)

In [8]:
# Confirm your changes
print(wine.head())

   fixed_acidity  volatile_acidity  citric_acid  residual_sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free_sulfur_dioxide  total_sulfur-dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality color  total_sulfur_dioxide  
0      9.4        5   red           

## Save Combined Dataset
Save your newly combined dataframe as `winequality_edited.csv`. Remember, set `index=False` to avoid saving with an unnamed column!

In [9]:
# save the dataframe
wine.to_csv('winequality_edited.csv', index=False)

In [10]:
# How many samples are there in the newely saved dataframe?___________6497
print(wine.shape[0])
# How many columns are there?___________14
print(wine.shape[1])

6497
14
