<div align = "center">
     <h4> Frequency Tables </h4>
     </div>

In [1]:
# Load the data
titanic_train <- read.csv("train.csv")

# Preprocessing steps:
char_cabin <- as.character(titanic_train$Cabin)  # Convert cabin to character

# Convert cabin to a substring of the first character or "" if empty
new_Cabin <- ifelse(char_cabin == "", "", substr(char_cabin, 1, 1))

# Convert new cabin variable back to a factor
titanic_train$Cabin <- factor(new_Cabin)

# Check the first few rows of the dataset
head(titanic_train)


Unnamed: 0_level_0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Unnamed: 0_level_1,<int>,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<fct>,<chr>
1,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C,C
3,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C,S
5,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
6,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q


**One-Way Tables**

In [2]:
table(titanic_train$Survived)            # One-way table of survival
table(titanic_train$Pclass)              # One-way table of passenger class
table(titanic_train$Sex)                 # One-way table of sex
table(titanic_train$Cabin)               # One-way table of cabin letter


  0   1 
549 342 


  1   2   3 
216 184 491 


female   male 
   314    577 


      A   B   C   D   E   F   G   T 
687  15  47  59  33  32  13   4   1 

In [3]:
cabin_table <- table(titanic_train$Cabin)   # Store a table

sum(cabin_table)             # Get the sum of counts (total number of records)

length(cabin_table)          # Get the length of the table (number of levels)

cabin_table[2]               # Get the second item

cabin_table[3:8]             # Get items 3-8


 B  C  D  E  F  G 
47 59 33 32 13  4 

In [4]:
age_table <- table(titanic_train$Age, exclude=NULL)   # Create table of ages

age_table                  # Check the age table and confirm it includes NA


0.42 0.67 0.75 0.83 0.92    1    2    3    4    5    6    7    8    9   10   11 
   1    1    2    2    1    7   10    6   10    4    3    3    4    8    2    4 
  12   13   14 14.5   15   16   17   18   19   20 20.5   21   22   23 23.5   24 
   1    2    6    1    5   17   13   26   25   15    1   24   27   15    1   30 
24.5   25   26   27   28 28.5   29   30 30.5   31   32 32.5   33   34 34.5   35 
   1   23   18   18   25    2   20   25    2   17   18    2   15   15    1   18 
  36 36.5   37   38   39   40 40.5   41   42   43   44   45 45.5   46   47   48 
  22    1    6   11   14   13    2    6   13    5    9   12    2    3    9    9 
  49   50   51   52   53   54   55 55.5   56   57   58   59   60   61   62   63 
   6   10    7    6    1    8    2    1    4    2    5    2    4    3    4    2 
  64   65   66   70 70.5   71   74   80 <NA> 
   2    3    1    2    1    2    1    1  177 

In [5]:
# Exclude cabins listed as the empty string
table(titanic_train$Cabin, exclude=c(""))


 A  B  C  D  E  F  G  T 
15 47 59 33 32 13  4  1 

In [6]:
gender_table <- table(titanic_train$Sex) # Create a table of genders

gender_table / sum(gender_table)         # Divide by the total counts


  female     male 
0.352413 0.647587 

In [7]:
prop.table(gender_table)


  female     male 
0.352413 0.647587 

**Two-Way Tables**

In [8]:
# Table of survival vs. sex
survived_sex <- table(titanic_train$Survived, titanic_train$Sex)

rownames(survived_sex) <- c("died","survived")   # Rename rows

survived_sex

          
           female male
  died         81  468
  survived    233  109

In [9]:
rowSums(survived_sex)     # Get row totals

In [10]:
colSums(survived_sex)     # Get column totals

In [11]:
survived_sex[2,1]        # Get the value in row 2, column 1

In [12]:
prop.table(survived_sex)

          
               female       male
  died     0.09090909 0.52525253
  survived 0.26150393 0.12233446

In [13]:
# Set margin = 1 to get proportions across rows
prop.table(survived_sex, margin=1)

          
              female      male
  died     0.1475410 0.8524590
  survived 0.6812865 0.3187135

In [14]:
# Set margin = 2 to get proportions across columns
prop.table(survived_sex, margin=2)

          
              female      male
  died     0.2579618 0.8110919
  survived 0.7420382 0.1889081

**Higher Dimensional Tables**

In [15]:
surv_sex_class <- table(titanic_train$Survived, 
                        titanic_train$Sex, 
                        titanic_train$Pclass)

dimnames(surv_sex_class)      # Check the names of each dimension

In [16]:
dimnames(surv_sex_class)[[1]] <- c("died", "survived")           # Rename dimension 1
dimnames(surv_sex_class)[[3]] <- c("class1", "class2","class3")  # Rename dimension 3

surv_sex_class                          # Check the 3-way table

, ,  = class1

          
           female male
  died          3   77
  survived     91   45

, ,  = class2

          
           female male
  died          6   91
  survived     70   17

, ,  = class3

          
           female male
  died         72  300
  survived     72   47


In [17]:
prop.table(surv_sex_class, margin= c(2,3) )

, ,  = class1

          
               female       male
  died     0.03191489 0.63114754
  survived 0.96808511 0.36885246

, ,  = class2

          
               female       male
  died     0.07894737 0.84259259
  survived 0.92105263 0.15740741

, ,  = class3

          
               female       male
  died     0.50000000 0.86455331
  survived 0.50000000 0.13544669


In [19]:
# Exercise 1 Load the Titanic Disaster data set and then create a one way table of the "Embarked" column.
titanic_train <- read.csv("train.csv")
table(titanic_train$Embarked)


      C   Q   S 
  2 168  77 644 

In [20]:
#Exercise2 Create a two-way contingency table of the Survived column and the Embarked column

table(titanic_train$Survived,
      titanic_train$Embarked)

   
          C   Q   S
  0   0  75  47 427
  1   2  93  30 217

In [21]:

# Exercise3 Use the table from exercise two and the prop.table() function to check the proportion of survival across each level of the Embarked column.
prop.table( table(titanic_train$Survived,
            titanic_train$Embarked),
            margin = 2)

   
                      C         Q         S
  0 0.0000000 0.4464286 0.6103896 0.6630435
  1 1.0000000 0.5535714 0.3896104 0.3369565