# Wine

### Introduction:

This exercise is a adaptation from the UCI Wine dataset.
The only pupose is to practice deleting data with pandas.

### Step 1. Import the necessary libraries

In [1]:
import numpy as np
import pandas as pd

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). 

### Step 3. Assign it to a variable called wine

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
wine = pd.read_csv(url, header=None)

In [3]:
wine.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns

In [4]:
wine = wine.drop(columns=wine.columns[[0,3,6,8,10,12,13]], axis=1)
wine.head()

Unnamed: 0,1,2,4,5,7,9,11
0,14.23,1.71,15.6,127,3.06,2.29,1.04
1,13.2,1.78,11.2,100,2.76,1.28,1.05
2,13.16,2.36,18.6,101,3.24,2.81,1.03
3,14.37,1.95,16.8,113,3.49,2.18,0.86
4,13.24,2.59,21.0,118,2.69,1.82,1.04


### Step 5. Assign the columns as below:

The attributes are (donated by Riccardo Leardi, riclea '@' anchem.unige.it):  
1) alcohol  
2) malic_acid  
3) alcalinity_of_ash  
4) magnesium  
5) flavanoids  
6) proanthocyanins  
7) hue 

In [5]:
wine.columns = ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'magnesium', 'flavanoids', 'proanthocyanins', 'hue']
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,14.23,1.71,15.6,127,3.06,2.29,1.04
1,13.2,1.78,11.2,100,2.76,1.28,1.05
2,13.16,2.36,18.6,101,3.24,2.81,1.03
3,14.37,1.95,16.8,113,3.49,2.18,0.86
4,13.24,2.59,21.0,118,2.69,1.82,1.04


### Step 6. Set the values of the first 3 rows from alcohol as NaN

In [6]:
wine.iloc[0:3, 0] = np.nan
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,,1.71,15.6,127,3.06,2.29,1.04
1,,1.78,11.2,100,2.76,1.28,1.05
2,,2.36,18.6,101,3.24,2.81,1.03
3,14.37,1.95,16.8,113,3.49,2.18,0.86
4,13.24,2.59,21.0,118,2.69,1.82,1.04


### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN

In [7]:
wine.iloc[2:4, 3] = np.nan
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,,1.71,15.6,127.0,3.06,2.29,1.04
1,,1.78,11.2,100.0,2.76,1.28,1.05
2,,2.36,18.6,,3.24,2.81,1.03
3,14.37,1.95,16.8,,3.49,2.18,0.86
4,13.24,2.59,21.0,118.0,2.69,1.82,1.04


### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium

In [8]:
wine.alcohol.fillna(10, inplace=True)
wine.magnesium.fillna(100, inplace=True)

### Step 9. Count the number of missing values

In [9]:
wine.isnull().sum()

alcohol              0
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64

### Step 10.  Create an array of 10 random numbers up until 10

In [10]:
rand_arr = np.random.randint(10, size=10)

### Step 11.  Use random numbers you generated as an index and assign NaN value to each of cell.

In [11]:
wine.alcohol[rand_arr] = np.nan
wine.head(10)

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,,1.71,15.6,127.0,3.06,2.29,1.04
1,,1.78,11.2,100.0,2.76,1.28,1.05
2,,2.36,18.6,100.0,3.24,2.81,1.03
3,,1.95,16.8,100.0,3.49,2.18,0.86
4,,2.59,21.0,118.0,2.69,1.82,1.04
5,,1.76,15.2,112.0,3.39,1.97,1.05
6,14.39,1.87,14.6,96.0,2.52,1.98,1.02
7,14.06,2.15,17.6,121.0,2.51,1.25,1.06
8,14.83,1.64,14.0,97.0,2.98,1.98,1.08
9,13.86,1.35,16.0,98.0,3.15,1.85,1.01


### Step 12.  How many missing values do we have?

In [12]:
wine.isnull().sum()

alcohol              6
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64

### Step 13. Delete the rows that contain missing values

In [13]:
wine = wine.dropna(axis=0)
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
6,14.39,1.87,14.6,96.0,2.52,1.98,1.02
7,14.06,2.15,17.6,121.0,2.51,1.25,1.06
8,14.83,1.64,14.0,97.0,2.98,1.98,1.08
9,13.86,1.35,16.0,98.0,3.15,1.85,1.01
10,14.1,2.16,18.0,105.0,3.32,2.38,1.25


### Step 14. Print only the non-null values in alcohol

In [14]:
nonnull_alcohol = wine.alcohol.notnull()
wine.alcohol[nonnull_alcohol]

6      14.39
7      14.06
8      14.83
9      13.86
10     14.10
11     14.12
12     13.75
13     14.75
14     14.38
15     13.63
16     14.30
17     13.83
18     14.19
19     13.64
20     14.06
21     12.93
22     13.71
23     12.85
24     13.50
25     13.05
26     13.39
27     13.30
28     13.87
29     14.02
30     13.73
31     13.58
32     13.68
33     13.76
34     13.51
35     13.48
       ...  
148    13.32
149    13.08
150    13.50
151    12.79
152    13.11
153    13.23
154    12.58
155    13.17
156    13.84
157    12.45
158    14.34
159    13.48
160    12.36
161    13.69
162    12.85
163    12.96
164    13.78
165    13.73
166    13.45
167    12.82
168    13.58
169    13.40
170    12.20
171    12.77
172    14.16
173    13.71
174    13.40
175    13.27
176    13.17
177    14.13
Name: alcohol, Length: 172, dtype: float64

### Step 15.  Reset the index, so it starts with 0 again

In [15]:
wine = wine.reset_index(drop=True)
wine.head()

Unnamed: 0,alcohol,malic_acid,alcalinity_of_ash,magnesium,flavanoids,proanthocyanins,hue
0,14.39,1.87,14.6,96.0,2.52,1.98,1.02
1,14.06,2.15,17.6,121.0,2.51,1.25,1.06
2,14.83,1.64,14.0,97.0,2.98,1.98,1.08
3,13.86,1.35,16.0,98.0,3.15,1.85,1.01
4,14.1,2.16,18.0,105.0,3.32,2.38,1.25
