# Appending Data
First, import the necessary packages and load `winequality-red.csv` and `winequality-white.csv`.

In [2]:
# import numpy and pandas
import pandas as pd
import numpy as np

# load red and white wine datasets
red_df = pd.read_csv('winequality-red.csv')
white_df = pd.read_csv('winequality-white.csv')

## Create Color Columns
Create two arrays as long as the number of rows in the red and white dataframes that repeat the value “red” or “white.” NumPy offers really easy way to do this. Here’s the documentation for [NumPy’s repeat](https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html) function. Take a look and try it yourself.

In [3]:
# create color array for red dataframe
color_red = np.repeat('red', red_df.shape[0])

# create color array for white dataframe
color_white = np.repeat('white', white_df.shape[0])


Add arrays to the red and white dataframes. Do this by setting a new column called 'color' to the appropriate array. The cell below does this for the red dataframe.

In [4]:
red_df['color'] = color_red
red_df.head()

Unnamed: 0,fixed_acidity;volatile_acidity;citric_acid;residual_sugar;chlorides;free_sulfur_dioxide;total_sulfur-dioxide;density;pH;sulphates;alcohol;quality,color
0,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,red
1,7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5,red
2,7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;...,red
3,11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58...,red
4,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,red


Do the same for the white dataframe and use `head()` to confirm the change.

In [5]:
white_df['color'] = color_white
white_df.head()

Unnamed: 0,fixed_acidity;volatile_acidity;citric_acid;residual_sugar;chlorides;free_sulfur_dioxide;total_sulfur_dioxide;density;pH;sulphates;alcohol;quality,color
0,7;0.27;0.36;20.7;0.045;45;170;1.001;3;0.45;8.8;6,white
1,6.3;0.3;0.34;1.6;0.049;14;132;0.994;3.3;0.49;9...,white
2,8.1;0.28;0.4;6.9;0.05;30;97;0.9951;3.26;0.44;1...,white
3,7.2;0.23;0.32;8.5;0.058;47;186;0.9956;3.19;0.4...,white
4,7.2;0.23;0.32;8.5;0.058;47;186;0.9956;3.19;0.4...,white


## Combine DataFrames with Append
Check the documentation for [Pandas' append](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.append.html) function and see if you can use this to figure out how to combine the dataframes. (Bonus: Why aren't we using the [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) method to combine the dataframes?) If you don’t get it, I’ll show you how afterwards. Make sure to save your work in this notebook! You'll come back to this later.

In [6]:
# append dataframes
wine_df = red_df.append(white_df)

# view dataframe to check for success
wine_df.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,color,fixed_acidity;volatile_acidity;citric_acid;residual_sugar;chlorides;free_sulfur_dioxide;total_sulfur-dioxide;density;pH;sulphates;alcohol;quality,fixed_acidity;volatile_acidity;citric_acid;residual_sugar;chlorides;free_sulfur_dioxide;total_sulfur_dioxide;density;pH;sulphates;alcohol;quality
0,red,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,
1,red,7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5,
2,red,7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;...,
3,red,11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58...,
4,red,7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5,


## Save Combined Dataset
Save your newly combined dataframe as `winequality_edited.csv`. Remember, set `index=False` to avoid saving with an unnamed column!

In [7]:
wine_df.to_csv('winequality_edited.csv', index = False)

In [8]:
import pandas as pd
wine_df = pd.read_csv('winequality_edited.csv', sep = ';')
wine_df.head()

Unnamed: 0,"color,fixed_acidity",volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur-dioxide,density,pH,sulphates,...,citric_acid.1,residual_sugar.1,chlorides.1,free_sulfur_dioxide.1,total_sulfur_dioxide,density.1,pH.1,sulphates.1,alcohol.1,quality
0,"red,7.4",0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,...,,,,,,,,,,
1,"red,7.8",0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,...,,,,,,,,,,
2,"red,7.8",0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,...,,,,,,,,,,
3,"red,11.2",0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,...,,,,,,,,,,
4,"red,7.4",0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,...,,,,,,,,,,


In [9]:
for i, l in enumerate(wine_df.columns):
    print(i, l)

0 color,fixed_acidity
1 volatile_acidity
2 citric_acid
3 residual_sugar
4 chlorides
5 free_sulfur_dioxide
6 total_sulfur-dioxide
7 density
8 pH
9 sulphates
10 alcohol
11 quality,fixed_acidity
12 volatile_acidity.1
13 citric_acid.1
14 residual_sugar.1
15 chlorides.1
16 free_sulfur_dioxide.1
17 total_sulfur_dioxide
18 density.1
19 pH.1
20 sulphates.1
21 alcohol.1
22 quality


In [10]:
wine_df.rename(columns = {'quality,fixed_acidity': 'fixed_acidity.1'}, inplace = True)

In [11]:
wine_df.head()

Unnamed: 0,"color,fixed_acidity",volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur-dioxide,density,pH,sulphates,...,citric_acid.1,residual_sugar.1,chlorides.1,free_sulfur_dioxide.1,total_sulfur_dioxide,density.1,pH.1,sulphates.1,alcohol.1,quality
0,"red,7.4",0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,...,,,,,,,,,,
1,"red,7.8",0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,...,,,,,,,,,,
2,"red,7.8",0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,...,,,,,,,,,,
3,"red,11.2",0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,...,,,,,,,,,,
4,"red,7.4",0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,...,,,,,,,,,,
