# This repo contains an introduction to Jupyter and IPython.

Outline of some basics:

    Notebook Basics
    IPython - beyond plain python
    Markdown Cells
    Rich Display System
    Custom Display logic
    Running a Secure Public Notebook Server
    How Jupyter works to run code in different languages.

You can also get this tutorial and run it on your laptop:

git clone https://github.com/ipython/ipython-in-depth

Install IPython and Jupyter:

with conda:

conda install ipython jupyter

with pip:

# first, always upgrade pip!
pip install --upgrade pip
pip install --upgrade ipython jupyter

Start the notebook in the tutorial directory:

cd ipython-in-depth
jupyter notebook

In [1]:
!pip install --upgrade pip
!pip install --upgrade ipython jupyter
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install sklearn

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12,5)

df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')

df_wine.head()

ModuleNotFoundError: No module named 'pandas'

In [None]:

def prepocess_wine(df):
    df.loc[:, 'quality_cat'] = (df.loc[:, 'quality'] > 5).astype(int)
    df = df.drop('quality', axis=1)
    
    return df

df_wine = df_wine.pipe(prepocess_wine)
df_wine.head()


In [None]:

df_wine_test = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv',
                           sep=';')

df_wine_test = df_wine_test.pipe(prepocess_wine)
df_wine_test.head()

In [None]:
_, ax = plt.subplots(1,1)
df_wine.query('quality_cat == 1').hist('alcohol', alpha=0.3, bins=20, label='good', ax=ax)
df_wine.query('quality_cat == 0').hist('alcohol', alpha=0.3, bins=20, label='bad', ax=ax)
ax.set_xlabel('alcohol')
ax.set_ylabel('count')
plt.legend()

In [None]:
X, y = df_wine.iloc[:, :-1].values, df_wine.iloc[:, -1].values

from sklearn.base import BaseEstimator, ClassifierMixin

class SimpleClassifier(BaseEstimator, ClassifierMixin): 
    
    def __init__(self):
        """
        Инициализация метода
        """
        
        self.threshold = 0

    def fit(self, X, y=None):
        """
        Выполняет процедуру обучения классификатора. В данном случае - перебор занчения признака alcohol.
        Договоримся, что признак alcohol будет в последнем столбце матрицы X
        """
        
        alcohol = X[:,-1]
        n = float(y.shape[0])
        
        # Задаем пространство поиска
        alco_range = np.arange(alcohol.min(), alcohol.max(), 0.1)
        
        min_error_rate = 1
                
        #  Начинаем поиск
        for theta in alco_range:
            prediction = alcohol > theta
            
            error_rate = sum(prediction != y)/n
            
            if error_rate < min_error_rate:
                min_error_rate = error_rate
                self.threshold = theta  
        
        return self

    def predict(self, X, y=None):
        """
        Процедура предсказания класса по обученному порогу
        """
        
        return (X[:,-1] > self.threshold).astype(int)
    

model = SimpleClassifier()
model.fit(X, y)

print ('Оптимальный порог: %f' % model.threshold)

In [None]:
y_hat = model.predict(X)

error_rate = sum(y_hat != y)/float(y.shape[0])
print ('Доля ошибок на обучающей выборке: %f' % error_rate)


In [None]:
X_test, y_test = df_wine_test.iloc[:, :-1].values, df_wine_test.iloc[:, -1].values
y_hat = model.predict(X_test)

error_rate = sum(y_hat != y_test)/float(y_test.shape[0])
print ('Доля ошибок на контрольной выборке: %f' % error_rate)