In [None]:
# Question 1 - Recognise a Special String Label

In [35]:
%%file assignment01_q01.l
/* assignment01_q01.l just like unix wc */

%{
#include <stdio.h>
%}

%%
[A-Z*][a-zA-Z0-9_]*   { printf("Valid identifier: %s\n", yytext); }
.*                    { printf("Invalid identifier: %s\n", yytext); }
\n                    
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting assignment01_q01.l


In [38]:
# %%
# Regular expression matches identifiers starting with an uppercase letter or * followed by letters, digits, or underscores
# [A-Z\*][a-zA-Z0-9_]*   

# Match any other input (invalid identifiers)
# .*                     

# Ignore newline characters
# \n                    
# %%

In [36]:
%%bash
flex assignment01_q01.l
gcc lex.yy.c 

In [37]:
%%bash
./a.out
Name23
*aGe9_
54Saman
saman54

Valid identifier: Name23
Valid identifier: *aGe9_
Invalid identifier: 54Saman
Invalid identifier: saman54


In [None]:
# Question 2 - Recognise signed floating point numbers

In [119]:
%%file assignment01_q02.l
/* assignment01_q02.l just like unix wc */

%{
 #include <stdio.h>
%}

%%
[+-][0-9]+\.[0-9]+    { printf("Valid signed floating point number: %s\n", yytext); }
[^ \t\n]+             { printf("Invalid input: %s\n", yytext); }
\n 
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting assignment01_q02.l


In [120]:
# %%
# Regular expression matches signed floating point numbers with a mandatory sign
# [+-][0-9]+\.[0-9]+    

# Ignore newline characters
# \n                     

# Match any other input (invalid floating point numbers)
# Matches any sequence of characters that are not whitespace (spaces or tabs) or newlines
# [^ \t\n]+              
# %%

In [121]:
%%bash
flex assignment01_q02.l
gcc lex.yy.c 

In [122]:
%%bash
./a.out
+9.81 
-456.98
456.45
+56.34.22

Valid signed floating point number: +9.81
 Valid signed floating point number: -456.98
Invalid input: 456.45
Invalid input: +56.34.22


In [None]:
# Question 3 - Recognise signed floating point numbers

In [144]:
%%file assignment01_q03.l
/* assignment01_q03.l just like unix wc */

%{
#include <stdio.h>
%}

%%
\/\/[^\n]*                   { printf("Single line comment: %s\n", yytext); }
\/\*([^*]|\*+[^*/])*\*+\/    { printf("Multi-line comment: %s\n", yytext); }
\n 
.*                           { printf("Invalid input: %s\n", yytext); }
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting assignment01_q03.l


In [120]:
# %%
# Regular expression matches single line comments
# Matches single-line comments starting with // and followed by any characters except newline ([^\n]*).
# \/\/[^\n]*                   

# Regular expression matches multi-line comments
# Matches multi-line comments starting with /* and ending with */, allowing for nested asterisks within the comment.
# \/\*([^*]|\*+[^*/])*\*+\/    

# Ignore newline characters
# \n                          

# Match any other input (invalid comments)
# .*                           
# %%

In [145]:
%%bash
flex assignment01_q03.l
gcc lex.yy.c 

In [146]:
%%bash
./a.out
// This is a comment
/* This is a multi line comment */
/ This is a wrong single line
/* this is a wrong multiline comment //

Single line comment: // This is a comment
Multi-line comment: /* This is a multi line comment */
Invalid input: / This is a wrong single line
Invalid input: /* this is a wrong multiline comment //


In [None]:
# Question 4 - Recognise email addresses

In [148]:
%%file assignment01_q04.l
/* assignment01_q04.l just like unix wc */

%{
#include <stdio.h>
%}

%%
[a-zA-Z0-9_.-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}    { printf("Valid email address: %s\n", yytext); }
.*                                             { printf("Invalid input: %s\n", yytext); }
\n                                        
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting assignment01_q04.l


In [120]:
# %%
# Regular expression matches valid email addresses
# [a-zA-Z0-9_.-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}  

# [a-zA-Z0-9_.-]+ : Matches one or more letters, digits, underscores, dots, or hyphens for the username.
# @ : Matches the "@" symbol.
# [a-zA-Z0-9-]+ : Matches one or more letters, digits, or hyphens for the domain name.
# \. : Matches the literal dot.
# [a-zA-Z]{2,} : Matches the domain suffix, which is composed of two or more letters.

# Match any other input (invalid email addresses)
# .*                            

# Ignore newline characters
# \n                           
# %%

In [149]:
%%bash
flex assignment01_q04.l
gcc lex.yy.c 

In [151]:
%%bash
./a.out
nicola.tesla@dcelectic.com
nicola_tesla@dc-electic.com
nicola.tesla1856@dc-electic.com
nicola.tesla1856@dc*electic.com

Valid email address: nicola.tesla@dcelectic.com
Valid email address: nicola_tesla@dc-electic.com
Valid email address: nicola.tesla1856@dc-electic.com
Invalid input: nicola.tesla1856@dc*electic.com


In [None]:
# Question 5 - Recognise Hexadecimal Numbers

In [152]:
%%file assignment01_q05.l
/* assignment01_q05.l just like unix wc */

%{
#include <stdio.h>
%}

%%
0[xX][0-9a-fA-F]+   { printf("Valid hexadecimal number: %s\n", yytext); }
.*                  { printf("Invalid input: %s\n", yytext); }
\n 
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Writing assignment01_q05.l


In [156]:
# %%
# Regular expression matches hexadecimal numbers starting with "0x" or "0X" followed by hexadecimal digits
# 0[xX][0-9a-fA-F]+   

# Match any other input (invalid hexadecimal numbers)
# .*                 

# Ignore newline characters
# \n               
# %%

In [157]:
%%bash
flex assignment01_q05.l
gcc lex.yy.c 

In [158]:
%%bash
./a.out
0X3F
0Xaf
0x13
0x9ADF
0X0GAf
0x87AZ

Valid hexadecimal number: 0X3F
Valid hexadecimal number: 0Xaf
Valid hexadecimal number: 0x13
Valid hexadecimal number: 0x9ADF
Invalid input: 0X0GAf
Invalid input: 0x87AZ


In [None]:
# Question 6 - Recognise Date Formats

In [160]:
%%file assignment01_q06.l
/* assignment01_q05.l just like unix wc */

%{
#include <stdio.h>
%}

%%
(0[1-9]|[12][0-9]|3[01])-(0[1-9]|1[0-2])-[0-9]{4}   { printf("Valid date: %s\n", yytext); }
.*                                                  { printf("Invalid input: %s\n", yytext); }
\n 
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Writing assignment01_q06.l


In [164]:
# %%
# Regular expression matches valid dates in DD-MM-YYYY format
# (0[1-9]|[12][0-9]|3[01])-(0[1-9]|1[0-2])-[0-9]{4}   

# (0[1-9]|[12][0-9]|3[01]): Matches valid days:
#     0[1-9]: Matches days 01 to 09.
#     [12][0-9]: Matches days 10 to 29.
#     3[01]: Matches days 30 and 31.

# (0[1-9]|1[0-2])-: Matches valid months:
#     0[1-9]: Matches months 01 to 09.
#     1[0-2]: Matches months 10 to 12.

# [0-9]{4}: Matches a four-digit year.

# Match any other input (invalid dates)
# .*                                           

# Ignore newline characters
# \n                                          
# %%

In [161]:
%%bash
flex assignment01_q06.l
gcc lex.yy.c 

In [162]:
%%bash
./a.out
16-06-1970
31-12-1999
01-12-0999
01-13-2000
01-00-2000
00-12-2000
32-12-2000

Valid date: 16-06-1970
Valid date: 31-12-1999
Valid date: 01-12-0999
Invalid input: 01-13-2000
Invalid input: 01-00-2000
Invalid input: 00-12-2000
Invalid input: 32-12-2000


In [None]:
# Question 7 - Recognise HTML formats

In [169]:
%%file assignment01_q07.l
/* assignment01_q05.l just like unix wc */

%{
#include <stdio.h>
%}

%%
\<[a-zA-Z0-9_]+[^>]*\>      { printf("Opening tag: %s\n", yytext); }
\<\/[a-zA-Z0-9_]+\>         { printf("Closing tag: %s\n", yytext); }
\<!--(.|\n)*?--\>           { printf("Comment: %s\n", yytext); }
.|\n                        
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting assignment01_q07.l


In [172]:
# %%
# Regular expression to recognize opening tags
# \<[a-zA-Z0-9_]+[^>]*\>     

# Regular expression to recognize closing tags
# \<\/[a-zA-Z0-9_]+\>        

# Regular expression to recognize comments
# \<!--(.|\n)*?--\>          

# Ignore everything else
# .|\n                       
# %%

In [173]:
%%bash
flex assignment01_q07.l
gcc lex.yy.c 

In [174]:
%%bash
./a.out
<HTML>
<script>
<div class="container">
<script>
 const myString = "<script></script>"
</script>
<div class="container">
 <!-- <img src="cat.jpg" alt="big cat" > -->
</div>
</HTML>

Opening tag: <HTML>
Opening tag: <script>
Opening tag: <div class="container">
Opening tag: <script>
Opening tag: <script>
Closing tag: </script>
Closing tag: </script>
Opening tag: <div class="container">
Comment: <!-- <img src="cat.jpg" alt="big cat" > -->
Closing tag: </div>
Closing tag: </HTML>


In [None]:
# Question 8 - Recognise URLs

In [177]:
%%file assignment01_q08.l
/* assignment01_q05.l just like unix wc */

%{
#include <stdio.h>
%}

%%
^(http|https):\/\/[a-zA-Z0-9.-]+(:[0-9]+)?(\/[a-zA-Z0-9._/-]*)?$    { printf("Valid URL: %s\n", yytext); }
.*                                                                  { printf("Invalid input: %s\n", yytext); }
\n
%%

int main(void) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting assignment01_q08.l


In [180]:
# %%
# Regular expression to recognize URLs with optional port number and path
# ^(http|https):\/\/[a-zA-Z0-9.-]+(:[0-9]+)?(\/[a-zA-Z0-9._/-]*)?$ 

# ^(http|https): Matches the start of the line with "http" or "https".
# :\/\/: Matches the "://" separator.
# [a-zA-Z0-9.-]+: Matches letters, digits, dots, or hyphens for the domain name.
# (:[0-9]+)?: Optionally matches a colon followed by one or more digits for the port number.
# (\/[a-zA-Z0-9._/-]*)?$: Optionally matches a slash followed by letters, digits, dots, underscores, slashes, or hyphens for the path.

# Ignore newline characters
# \n                            
# Match any other input (invalid URLs)
# .*                             
# %%

In [178]:
%%bash
flex assignment01_q08.l
gcc lex.yy.c 

In [179]:
%%bash
./a.out
http://google.com
https://google.com:6060
https://google.com:6060/gemini

Valid URL: http://google.com
Valid URL: https://google.com:6060
Valid URL: https://google.com:6060/gemini
