## 1. Data frame

* A data structure in R
* Same as good old table with rows and columns
* Each column has a header with a name

### Reading data

In [1]:
setwd(".")

In [2]:
#let's read same data from exervice 1

data<-read.csv("data.csv") 

In [3]:
head(data,20)

Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,gender,finnish,website
2,1,5,1,2,2,1,5,3,4,male,no,BBC
4,2,4,5,1,5,1,5,5,5,male,yes,BBC
1,5,3,1,2,4,3,2,3,3,male,no,CNN
2,1,2,2,1,3,2,2,1,4,female,no,BBC
3,1,1,5,3,3,3,2,1,4,female,no,CNN
2,5,2,3,2,5,5,4,3,1,female,yes,BBC
1,5,1,5,3,2,4,2,5,3,male,no,BBC
2,1,5,4,1,2,2,3,5,3,male,yes,CNN
2,5,4,2,2,1,4,4,3,3,male,no,CNN
1,5,1,5,3,2,4,2,5,3,male,no,CNN


### Subsetting data

#### Need some help? google it.  Or type ?Name_of_Function

In [4]:
?subset

In [5]:
numeric_columns<-c("Q1","Q2","Q3","Q4","Q5","Q6","Q7","Q8","Q9","Q10")
int_students=subset(data,finnish=="no",select=numeric_columns)

#### or

In [6]:
int_students=subset(data,finnish=="no",select=c(1:10))

#### or 

In [7]:
int_students=subset(data,finnish=="no",select=c(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10))

#### yet another way

In [8]:
int_students=data[data$finnish=="no",1:10]

In [9]:
int_students

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10
1,2,1,5,1,2,2,1,5,3,4
3,1,5,3,1,2,4,3,2,3,3
4,2,1,2,2,1,3,2,2,1,4
5,3,1,1,5,3,3,3,2,1,4
7,1,5,1,5,3,2,4,2,5,3
9,2,5,4,2,2,1,4,4,3,3
10,1,5,1,5,3,2,4,2,5,3
11,3,2,3,5,1,5,4,5,5,5
12,1,3,2,5,4,1,4,1,5,2
13,1,1,4,5,5,4,2,5,2,1


### Some statistics

In [10]:
correlation_between_Q1_Q2=cor(int_students$Q1,int_students$Q2)
correlation_between_Q1_Q2

In [11]:
mean_Q6=mean(int_students$Q1)
mean_Q6

### Merging data frames 

In [12]:
conditio

ERROR: Error in eval(expr, envir, enclos): object 'conditio' not found


# 2. Functions

Functions are the building blocks of R 

A function is written as follows: 

      myfunction <- function(arg1, arg2, ... ){
            statements
            return(object)
      } 

A function in R is called as;

**myfunction(args)**

Lets script some functions
* Standard Deviation 
* Frequency Table
* Function using factoring
* Function to merge two data frames

### Standard deviation  

In [None]:
standard_deviation <- function(sample){
    xbar<-mean(sample)
    sumXminusXbar_sqrd=0
    for(x in  sample){
        diff_from_mean=x-xbar
        sumXminusXbar_sqrd=sumXminusXbar_sqrd+(diff_from_mean*diff_from_mean)
    }
    denominator=length(sample)-1
    return(sqrt(sumXminusXbar_sqrd/denominator))
}

#### Lets test the function using the entire data

In [None]:
# Standard Deviation of Q1 responses
sd<-standard_deviation(data$Q1)
sd

#### Lets validate our function with R's built in Standard deviation function

In [None]:
sd(data$Q1)

### Frequency table

In [None]:
frequency_table <- function(dataframe){
    res <- NULL
    
    for(columnName in names(dataframe)){
        
        sample<-dataframe[,c(columnName)]
        xbar<-mean(sample)
        
        #using our own defined started deviation
        sd<-standard_deviation(sample)
        count<-length(sample)
        
        res <- rbind(res,c(columnName,count,xbar,sd))
        
    }
    colnames(res) <- c("response_for","count","mean","standard_deviation")
    res<-data.frame(res)
    return(res)

}

What does **names**, **rbind**, **colnames** functions do??

#### Now lets test our function

In [None]:
# selecting or subsetting column 1 to 10 of our entire data

all_questions=data[,1:10]

In [None]:
freq_table<-frequency_table(all_questions)
freq_table

### Function using factoring

Lets look at our entire data again

In [None]:
data

In [None]:
levels(data$website) 
factor(data$website)

In [None]:
library("hcitools")
news_score<-function(d){
    res <- NULL
    for (news_firm in levels(d$website)){ 
        for (gender in levels(d$gender)){
            
            subset<-d[d$website==news_firm&d$gender==gender,1:10]
            result=questionnaire.analyse(subset, name="SUS")
            res <- rbind(res,c(news_firm,gender,result))
        }
    }
    colnames(res) <- c("Website","Gender","Sus_score")
    res<-data.frame(res)
    return(res)
}

In [None]:
news_score(data)

In [None]:
?rnorm



### Function to merge two data frames

#### lets write a function to generate a dataframe with 10 rows
* x column is a sequence from 1 to 10
* y column is a randomly generated sequences with mean=0 and standard deviation=1

In [None]:
generate_data<-function(){
    x<-seq(from=1,to=10)
    y<-rnorm(10,mean=0,sd=1)
    xy<-cbind(x,y)
    #asMatrix<-as.data.frame(xy)
    colnames(xy)<-c("x","y")
    return(data.frame(xy))
}

In [None]:
gen_data1<-generate_data()
gen_data1

gen_data2<-generate_data()
gen_data2

#### rbind finally does the trick of merging the two data frames into one

In [None]:
merged_data<-rbind(gen_data1,gen_data1)

In [None]:
merged_data