***Q:** why set a value for `random_state`*
> *Ensure that a `random` process will output the same results every time, which makes your code reproducible (by you and others!)*

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
import pandas as pd

X = pd.DataFrame({
    "Fare": [7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583],
    "Embarked": ["S", "C", "S", "S", "S", "Q"],
    "Sex": ["male", "female", "female", "female", "male", "male"]
})

X

Unnamed: 0,Fare,Embarked,Sex
0,7.25,S,male
1,71.2833,C,female
2,7.925,S,female
3,53.1,S,female
4,8.05,S,male
5,8.4583,Q,male


In [9]:
# Any positive integer can be used as random_state
# It controls the randomness of the split
X_train, X_test = train_test_split(
    X,
    test_size=0.5,
    random_state=1
)

X_train

Unnamed: 0,Fare,Embarked,Sex
0,7.25,S,male
3,53.1,S,female
5,8.4583,Q,male


In [10]:
# Using the SAME random_state value
# results in the SAME random split every time
X_train, X_test = train_test_split(
    X,
    test_size=0.5,
    random_state=1
)

X_train

Unnamed: 0,Fare,Embarked,Sex
0,7.25,S,male
3,53.1,S,female
5,8.4583,Q,male


> *Output will be identical to the previous `X_train`*

In [11]:
X_train, X_test = train_test_split(
    X,
    test_size=0.5,
    random_state=42 #
)

X_train

Unnamed: 0,Fare,Embarked,Sex
2,7.925,S,female
4,8.05,S,male
3,53.1,S,female


In [12]:
X_train, X_test = train_test_split(
    X,
    test_size=0.5,
    random_state=42 #
)

X_train

Unnamed: 0,Fare,Embarked,Sex
2,7.925,S,female
4,8.05,S,male
3,53.1,S,female


: 