In [6]:
from typing import Tuple
import numpy as np
import pandas as pd
import random

In [34]:
def split_train_test(X: pd.DataFrame, y: pd.Series, train_proportion: float = .75) \
        -> Tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
    """
    Randomly split given sample to a training- and testing sample

    Parameters
    ----------
    X : DataFrame of shape (n_samples, n_features)
        Data frame of samples and feature values.

    y : Series of shape (n_samples, )
        Responses corresponding samples in data frame.

    train_proportion: Fraction of samples to be split as training set

    Returns
    -------
    train_X : DataFrame of shape (ceil(train_proportion * n_samples), n_features)
        Design matrix of train set

    train_y : Series of shape (ceil(train_proportion * n_samples), )
        Responses of training samples

    test_X : DataFrame of shape (floor((1-train_proportion) * n_samples), n_features)
        Design matrix of test set

    test_y : Series of shape (floor((1-train_proportion) * n_samples), )
        Responses of test samples

    """
    
    permut = random.permutation(X.shape[0])
    X = X[permut]
    y = y[permut]
    
    train_end_index = int(X.shape[0] * train_proportion) + 1
    return X[:train_end_index] , y[:train_end_index], X[train_end_index:], y[train_end_index:]

In [29]:
X = np.arange(100).reshape((10,10))
y =  np.arange(100)
n = X.shape[0]
train_proportion = 0.75
permut = np.random.permutation(n)
X = X[permut]
y = y[permut]
train_end_index = int(X.shape[0] * train_proportion)

In [33]:
X[:train_end_index] 

array([[50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79]])

In [31]:
y[:train_end_index,]

array([5, 0, 4, 8, 3, 2, 7])