# Very short introduction to Python, Jupyter and Matplotlib


Go through the lines and read the comments for clarification.

You can edit all the cells and experiment with the scripts.

Press **Ctrl-Enter** to parse the content of a cell.

In [None]:
# Import the modules Numpy and Matplotlib and reference them with `np` and `plt` in the script.
# Numpy is a versatile and high-performing library for scientific calculations and multi-dimensional data.
import numpy as np
# Matplotlib is a widely used plotting library based on Matlab.
import matplotlib.pyplot as plt

# Press Ctrl-Enter to run this cell.

In [None]:
# You can define functions and variables that can be used in other cells.
def average(numbers: list):
    # You can add type annotation like `var: type` for readability. They are ignored by the interpreter and thus
    # does not affect the runtime of the programme.

    # BE AWARE! In Python spaces (no tabs!) are used to structure the code.
    return sum(numbers) / len(numbers)

In [None]:
# Call the above function.
# Make sure you have pressed Ctrl-Enter! 
average([1, 2, 3, 4])

## Working with Numpy

In [None]:
# Load a CSV file into a Numpy array.
# For this example we use a dataset from UCI Machine Learning Repository 
# (https://archive.ics.uci.edu/ml/datasets/Absenteeism+at+work).
data = np.loadtxt(open("../data/Absenteeism_at_work.csv", "rb"), delimiter=";", skiprows=1)

In [None]:
# Run this cell to see the string representation of `data`
data

In [None]:
# Here you see the dimension of the array as a tuple.
# `data` consists of 740 rows and 21 columns.
data.shape

In [None]:
# Numpy arrays come with a very mighty slicing mechanism.
# You can select specific columns and rows with [row_from:row_to, col_from:col_to, ... if there are more dimensions]
# Here we select the first three columns
first_three_columns = data[:, 0:3]
# The 0 here is optional. data[:, :3] would be equally valid. Try it out!

In [None]:
# Have a look at the dimensions of your selection
first_three_columns.shape

In [None]:
# Here we select the last three rows
last_three_rows = data[-3:, :]
last_three_rows.shape

In [None]:
# Here we select some chunk from the middle
subset = data[3:6, 11:17]
subset.shape

In [None]:
# Let's have a look at some specific data
weight = data[:, 17]
height = data[:, 18]

## Plotting with Matplotlib

In [None]:
# Let's plot the data to see the relationship between weight and height
plt.plot(height, weight, linestyle="none", marker="x")
# Jupyter can display the graphical output of Matplotlib. Quite cool, right?

In [None]:
# What is the mean of the values?
np.mean(height), np.mean(weight)
#You can also surround the above expression with parentheses (...) and the result would be same.
# The output is a 2-tuple. 

In [None]:
# What is their correlation?
np.corrcoef(height, weight)

In [None]:
# Let's fit a linear curve through the data
fit = np.polyfit(height, weight, 1)
# The result is an array with the gradient and the intercept
fit

In [None]:
# Let's plot the data with the linear regression
plt.plot(height, weight, 'yo', height, height*fit[0] + fit[1], '--k')

## TO BE CONTINUED