/
ddr3_memory_controller.v
4264 lines (3363 loc) · 140 KB
/
ddr3_memory_controller.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Credit : https://github.com/MartinGeisse/esdk2/blob/master/simsyn/orange-crab/src/mahdl/name/martingeisse/esdk/riscv/orange_crab/ddr3/RamController.mahdl
// Will simulate loopback transaction (write some data into RAM, then read those data back from RAM)
// with the verilog simulation model provided by Micron
// https://www.micron.com/products/dram/ddr3-sdram/part-catalog/mt41j128m16jt-125
// Later, formal verification will proceed with using Micron simulation model
`define SYNTHESIS 1
`define VIVADO 1 // for 7-series and above
`define HIGH_SPEED 1 // Minimum DDR3-1600 operating frequency >= 303MHz
`ifndef SYNTHESIS
`define MICRON_SIM 1 // micron simulation model
`define TESTBENCH 1 // for both micron simulation model and Xilinx ISIM simulator
`endif
`define USE_x16 1
`define USE_SERDES 1
// `define TDQS 1
//`define RAM_SIZE_1GB
`define RAM_SIZE_2GB
//`define RAM_SIZE_4GB
`ifndef FORMAL
`ifdef HIGH_SPEED
// for lattice ECP5 FPGA
//`define LATTICE 1
// for Xilinx Spartan-6 FPGA
`define XILINX 1
// for Altera MAX-10 FPGA
//`define ALTERA 1
`endif
`endif
//`ifndef XILINX
/* verilator lint_off VARHIDDEN */
localparam NUM_OF_DDR_STATES = 23;
// TIME_TZQINIT = 512
// See also 'COUNTER_INCREMENT_VALUE' on why some of the large timing variables are not used in this case
localparam MAX_WAIT_COUNT = 512;
/* verilator lint_on VARHIDDEN */
//`endif
// write data to RAM and then read them back from RAM
`define LOOPBACK 1
`ifdef LOOPBACK
`ifndef FORMAL
`ifndef MICRON_SIM
// data loopback requires internal logic analyzer (ILA) capability to check data integrity
`define USE_ILA 1
`endif
`endif
`endif
// https://www.systemverilog.io/ddr4-basics
module ddr3_memory_controller
#(
parameter NUM_OF_WRITE_DATA = 32, // 32 pieces of data are to be written to DRAM
parameter NUM_OF_READ_DATA = 32, // 32 pieces of data are to be read from DRAM
parameter DATA_BURST_LENGTH = 8, // eight data transfers per burst activity, please modify MR0 setting if none other than BL8
`ifdef USE_SERDES
// why 8 ? because of FPGA development board is using external 50 MHz crystal
// and the minimum operating frequency for Micron DDR3 memory is 303MHz
parameter SERDES_RATIO = 8,
`endif
parameter PICO_TO_NANO_CONVERSION_FACTOR = 1000, // 1ns = 1000ps
`ifndef HIGH_SPEED
parameter PERIOD_MARGIN = 10, // 10ps margin
parameter MAXIMUM_CK_PERIOD = 3300-PERIOD_MARGIN, // 3300ps which is defined by Micron simulation model
parameter DIVIDE_RATIO = 4, // master 'clk' signal is divided by 4 for DDR outgoing 'ck' signal, it is for 90 degree phase shift purpose.
parameter DIVIDE_RATIO_HALVED = (DIVIDE_RATIO >> 1),
// host clock period in ns
// clock period of 'clk' = 0.8225ns , clock period of 'ck' = 3.3ns
parameter CLK_PERIOD = $itor(MAXIMUM_CK_PERIOD/DIVIDE_RATIO)/$itor(PICO_TO_NANO_CONVERSION_FACTOR),
`else
parameter CLK_PERIOD = 20, // 20ns, 50MHz
parameter CLK_SERDES_PERIOD = 12, // 12ns, 83.333MHz
`endif
`ifdef TESTBENCH
`ifndef MICRON_SIM
parameter PERIOD_MARGIN = 10, // 10ps margin
parameter MAXIMUM_CK_PERIOD = 3300-PERIOD_MARGIN, // 3300ps which is defined by Micron simulation model
parameter DIVIDE_RATIO = 4, // master 'clk' signal is divided by 4 for DDR outgoing 'ck' signal, it is for 90 degree phase shift purpose.
`endif
`endif
`ifdef HIGH_SPEED
parameter CK_PERIOD = 3, // 333.333MHz from PLL, 1/333.333MHz = 3ns
`else
parameter CK_PERIOD = (CLK_PERIOD*DIVIDE_RATIO),
`endif
// for STATE_IDLE transition into STATE_REFRESH
// tREFI = 65*tRFC calculated using info from Micron dataheet, so tREFI > 8 * tRFC
// So it is entirely possible to do all 8 refresh commands inside one tREFI cycle
// since each refresh command will take tRFC cycle to finish
// See also https://www.systemverilog.io/understanding-ddr4-timing-parameters#refresh
/* verilator lint_off VARHIDDEN */
parameter MAX_NUM_OF_REFRESH_COMMANDS_POSTPONED = 8, // 9 commands. one executed immediately, 8 more enqueued.
/* verilator lint_on VARHIDDEN */
`ifdef USE_x16
parameter DQS_BITWIDTH = 2,
`ifdef RAM_SIZE_1GB
parameter ADDRESS_BITWIDTH = 13,
`elsif RAM_SIZE_2GB
parameter ADDRESS_BITWIDTH = 14,
`elsif RAM_SIZE_4GB
parameter ADDRESS_BITWIDTH = 15,
`endif
`else
parameter DQS_BITWIDTH = 1,
`ifdef RAM_SIZE_1GB
parameter ADDRESS_BITWIDTH = 14,
`elsif RAM_SIZE_2GB
parameter ADDRESS_BITWIDTH = 15,
`elsif RAM_SIZE_4GB
parameter ADDRESS_BITWIDTH = 16,
`endif
`endif
parameter BANK_ADDRESS_BITWIDTH = 3, // 8 banks, and $clog2(8) = 3
`ifdef USE_x16
parameter DQ_BITWIDTH = 16 // bitwidth for each piece of data
`else
parameter DQ_BITWIDTH = 8 // bitwidth for each piece of data
`endif
)
(
// these are FPGA internal signals
input clk,
input reset,
input write_enable, // write to DDR memory
input read_enable, // read from DDR memory
input [BANK_ADDRESS_BITWIDTH+ADDRESS_BITWIDTH-1:0] i_user_data_address, // the DDR memory address for which the user wants to write/read the data
`ifdef USE_SERDES
input [DQ_BITWIDTH*SERDES_RATIO-1:0] data_to_ram, // data for which the user wants to write to DDR
output [DQ_BITWIDTH*SERDES_RATIO-1:0] data_from_ram, // the requested data from DDR RAM after read operation
`else
// TWO pieces of data bundled together due to double-data-rate requirement of DQ signal
input [(DQ_BITWIDTH << 1)-1:0] data_to_ram, // data to be written to DDR RAM
output [(DQ_BITWIDTH << 1)-1:0] data_from_ram, // the requested data being read from DDR RAM read operation
`endif
input [$clog2(MAX_NUM_OF_REFRESH_COMMANDS_POSTPONED):0] user_desired_extra_read_or_write_cycles, // for the purpose of postponing refresh commands
`ifndef HIGH_SPEED
output clk_slow_posedge, // for dq phase shifting purpose
output clk180_slow_posedge, // for dq phase shifting purpose
`endif
// these are to be fed into external DDR3 memory
output [ADDRESS_BITWIDTH-1:0] address,
output [BANK_ADDRESS_BITWIDTH-1:0] bank_address,
`ifdef HIGH_SPEED
output ck_obuf, // CK
output ck_n_obuf, // CK#
`else
output ck, // CK
output ck_n, // CK#
`endif
`ifdef TESTBENCH
output ck_90,
output ck_270,
output [DQ_BITWIDTH-1:0] dq_iobuf_enable,
output ldqs_iobuf_enable,
output udqs_iobuf_enable,
`endif
output reg data_read_is_ongoing,
`ifdef HIGH_SPEED
output clk_serdes_data, // 83.333MHz with 270 phase shift
output clk_serdes, // 83.333MHz with 45 phase shift
output ck_180, // 333.333MHz with 180 phase shift
output reg locked_previous,
output need_to_assert_reset,
`endif
output ck_en, // CKE
output cs_n, // chip select signal
output odt, // on-die termination
output ras_n, // RAS#
output cas_n, // CAS#
output we_n, // WE#
output reset_n,
inout [DQ_BITWIDTH-1:0] dq, // Data input/output
// for coordinating with the user application on when to start DRAM write and read operation
output reg [$clog2(NUM_OF_DDR_STATES)-1:0] main_state,
output reg [$clog2(MAX_WAIT_COUNT):0] wait_count,
// Xilinx ILA could not probe port IO of IOBUF primitive, but could probe rest of the ports (ports I, O, and T)
`ifdef USE_ILA
output [DQ_BITWIDTH-1:0] dq_w, // port I
output [DQ_BITWIDTH-1:0] dq_r, // port O
output low_Priority_Refresh_Request,
output high_Priority_Refresh_Request,
// to propagate 'write_enable' and 'read_enable' signals during STATE_IDLE to STATE_WRITE and STATE_READ
output reg write_is_enabled,
output reg read_is_enabled,
output reg [$clog2(MAX_NUM_OF_REFRESH_COMMANDS_POSTPONED):0] refresh_Queue,
`ifndef HIGH_SPEED
output reg [($clog2(DIVIDE_RATIO_HALVED)-1):0] dqs_counter,
`endif
output dqs_rising_edge,
output dqs_falling_edge,
`endif
`ifdef USE_x16
output ldm, // lower-byte data mask
output udm, // upper-byte data mask
inout ldqs, // lower byte data strobe
inout ldqs_n,
inout udqs, // upper byte data strobe
inout udqs_n
`else
inout [DQS_BITWIDTH-1:0] dqs, // Data strobe
inout [DQS_BITWIDTH-1:0] dqs_n,
// driven to high-Z if TDQS termination function is disabled
// according to TN-41-06: DDR3 Termination Data Strobe (TDQS)
// Please as well look at TN-41-04: DDR3 Dynamic On-Die Termination Operation
`ifdef TDQS
inout [DQS_BITWIDTH-1:0] tdqs, // Termination data strobe, but can act as data-mask (DM) when TDQS function is disabled
`else
output [DQS_BITWIDTH-1:0] tdqs,
`endif
inout [DQS_BITWIDTH-1:0] tdqs_n
`endif
);
// When writes are done on bus with a data-width > 8, you are doing a single write for multiple bytes and
// then need to be able to indicate which bytes are valid and need to be updated in memory,
// which bytes should be ignored. That's the purpose of DM.
// It is allowed to have DM always pulled low (some boards are wired like this) but will make you loose
// the byte granularity on writes, your granularity is then on DRAM's burst words.
// DM is just here to have byte granularity on the write accesses
// (ie you only want to update some bytes of the DRAM word)
`ifndef USE_x16
`ifndef TDQS
assign tdqs = 0; // acts as DM
`endif
`endif
/*
reg previous_clk_en;
always @(posedge clk)
begin
if(reset) previous_clk_en <= 0;
previous_clk_en <= clk_en;
end
*/
// Commands truth table extracted from Micron specification document
/*
localparam MRS = (previous_clk_en) & (ck_en) & (~cs_n) & (~ras_n) & (~cas_n) & (~we_n);
localparam REF = (previous_clk_en) & (ck_en) & (~cs_n) & (~ras_n) & (~cas_n) & (we_n);
localparam PRE = (previous_clk_en) & (ck_en) & (~cs_n) & (~ras_n) & (cas_n) & (~we_n) & (~A10);
localparam PREA = (previous_clk_en) & (ck_en) & (~cs_n) & (~ras_n) & (cas_n) & (~we_n) & (A10);
localparam ACT = (previous_clk_en) & (ck_en) & (~cs_n) & (~ras_n) & (cas_n) & (we_n);
localparam WR = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (~we_n) & (~A10);
localparam WRS4 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (~we_n) & (~A12) & (~A10);
localparam WRS8 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (~we_n) & (A12) & (~A10);
localparam WRAP = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (~we_n) & (A10);
localparam WRAPS4 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (~we_n) & (~A12) & (A10);
localparam WRAPS8 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (~we_n) & (A12) & (A10);
localparam RD = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (we_n) & (~A10);
localparam RDS4 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (we_n) & (~A12) & (~A10);
localparam RDS8 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (we_n) & (A12) & (~A10);
localparam RDAP = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (we_n) & (A10);
localparam RDAPS4 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (we_n) & (~A12) & (A10);
localparam RDAPS8 = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (~cas_n) & (we_n) & (A12) & (A10);
localparam NOP = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (cas_n) & (we_n);
localparam DES = (previous_clk_en) & (ck_en) & (cs_n);
localparam PDE = (previous_clk_en) & (~ck_en) & (~cs_n) & (ras_n) & (cas_n) & (we_n);
localparam PDX = (~previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (cas_n) & (we_n);
localparam ZQCL = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (cas_n) & (~we_n) & (A10);
localparam ZQCS = (previous_clk_en) & (ck_en) & (~cs_n) & (ras_n) & (cas_n) & (~we_n) & (~A10);
*/
// for the purpose of calculating DDR timing parameters such as tXPR, tRFC, ...
//reg [$clog2(MAX_WAIT_COUNT):0] wait_count;
// to synchronize signal in clk_serdes domain to ck_180 domain
wire [$clog2(MAX_WAIT_COUNT):0] wait_count_ck_180;
wire [$clog2(NUM_OF_DDR_STATES)-1:0] main_state_ck_180;
//reg [$clog2(NUM_OF_DDR_STATES)-1:0] main_state;
reg [$clog2(NUM_OF_DDR_STATES)-1:0] previous_main_state;
reg [$clog2(NUM_OF_DDR_STATES)-1:0] previous_main_state_ck_180;
// for PLL lock issue
reg [$clog2(NUM_OF_DDR_STATES)-1:0] state_to_be_restored;
localparam STATE_RESET = 0;
localparam STATE_RESET_FINISH = 1;
localparam STATE_ZQ_CALIBRATION = 23;
localparam STATE_IDLE = 24;
localparam STATE_ACTIVATE = 5;
localparam STATE_WRITE = 6;
localparam STATE_WRITE_AP = 7;
localparam STATE_WRITE_DATA = 8;
localparam STATE_READ = 9;
localparam STATE_READ_AP = 10;
localparam STATE_READ_DATA = 3; // smaller value to solve setup timing issue due to lesser comparison hardware
localparam STATE_PRECHARGE = 12;
localparam STATE_REFRESH = 13;
localparam STATE_WRITE_LEVELLING = 14;
localparam STATE_INIT_CLOCK_ENABLE = 15;
localparam STATE_INIT_MRS_2 = 16;
localparam STATE_INIT_MRS_3 = 17;
localparam STATE_INIT_MRS_1 = 18;
localparam STATE_INIT_MRS_0 = 19;
localparam STATE_WAIT_AFTER_MPR = 20;
localparam STATE_MRS3_TO_MRS1 = 21;
localparam STATE_PLL_LOCK_ISSUE = 22;
localparam STATE_READ_ACTUAL = 2;
localparam STATE_READ_AP_ACTUAL = 4;
// https://www.systemverilog.io/understanding-ddr4-timing-parameters
// TIME_INITIAL_CK_INACTIVE
localparam MAX_TIMING = (500000/CLK_SERDES_PERIOD); // just for initial development stage, will refine the value later
// just to avoid https://github.com/YosysHQ/yosys/issues/2718
`ifndef XILINX
localparam FIXED_POINT_BITWIDTH = $clog2(MAX_TIMING);
`else
localparam FIXED_POINT_BITWIDTH = 18;
`endif
`ifdef FORMAL
// just to make the cover() spends lesser time to complete
localparam TIME_INITIAL_RESET_ACTIVE = 2;
localparam TIME_INITIAL_CK_INACTIVE = 2;
localparam TIME_TZQINIT = 2;
localparam TIME_RL = 2;
localparam TIME_WL = 2;
localparam TIME_TBURST = 2;
localparam TIME_TXPR = 2;
localparam TIME_TMRD = 2;
localparam TIME_TMOD = 2;
localparam TIME_TRFC = 2;
localparam TIME_TREFI = 2;
localparam TIME_TDLLK = 2;
`else
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_INITIAL_RESET_ACTIVE = (200000/CLK_SERDES_PERIOD); // 200us = 200000ns, After the power is stable, RESET# must be LOW for at least 200µs to begin the initialization process.
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_INITIAL_CK_INACTIVE = (500000/CLK_SERDES_PERIOD); // 500us = 500000ns, After RESET# transitions HIGH, wait 500µs (minus one clock) with CKE LOW.
`ifdef RAM_SIZE_1GB
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TRFC = (110/CLK_SERDES_PERIOD); // minimum 110ns, Delay between the REFRESH command and the next valid command, except DES
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TXPR = ((10+110)/CLK_SERDES_PERIOD); // https://i.imgur.com/SAqPZzT.png, min. (greater of(10ns+tRFC = 120ns, 5 clocks))
`elsif RAM_SIZE_2GB
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TRFC = (160/CLK_SERDES_PERIOD);
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TXPR = ((10+160)/CLK_SERDES_PERIOD); // https://i.imgur.com/SAqPZzT.png, min. (greater of(10ns+tRFC = 170ns, 5 clocks))
`elsif RAM_SIZE_4GB
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TRFC = (260/CLK_SERDES_PERIOD);
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TXPR = ((10+260)/CLK_SERDES_PERIOD); // https://i.imgur.com/SAqPZzT.png, min. (greater of(10ns+tRFC = 270ns, 5 clocks))
`endif
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TREFI = (7800/CLK_SERDES_PERIOD); // 7.8?s = 7800ns, Maximum average periodic refresh
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TRAS = (35/CLK_SERDES_PERIOD); // minimum 35ns, ACTIVATE-to-PRECHARGE command period
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TRP = (13.91/CLK_SERDES_PERIOD); // minimum 13.91ns, Precharge time. The banks have to be precharged and idle for tRP before a REFRESH command can be applied
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TRCD = (13.91/CLK_SERDES_PERIOD); // minimum 13.91ns, Time RAS-to-CAS delay, ACT to RD/WR
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TWR = (15/CLK_SERDES_PERIOD); // Minimum 15ns, Write recovery time is the time interval between the end of a write data burst and the start of a precharge command. It allows sense amplifiers to restore data to cells.
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TFAW = (50/CLK_SERDES_PERIOD); // Minimum 50ns, Why Four Activate Window, not Five or Eight Activate Window ? For limiting high current drain over the period of tFAW time interval
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TIS = (0.195/CLK_SERDES_PERIOD); // Minimum 195ps, setup time
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TDLLK = (512*CK_PERIOD/CLK_SERDES_PERIOD); // tDLLK = 512 clock cycles, DLL locking time
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TZQINIT = (512*CK_PERIOD/CLK_SERDES_PERIOD); // tZQINIT = 512 clock cycles, ZQCL command calibration time for POWER-UP and RESET operation
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_RL = (5*CK_PERIOD/CLK_SERDES_PERIOD); // if DLL is disable, only CL=6 is supported. Since AL=0 for simplicity and RL=AL+CL , RL=5
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_WL = (5*CK_PERIOD/CLK_SERDES_PERIOD); // if DLL is disable, only CWL=6 is supported. Since AL=0 for simplicity and WL=AL+CWL , WL=5
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TBURST = ((DATA_BURST_LENGTH >> 1)*CK_PERIOD/CLK_SERDES_PERIOD); // each read or write commands will work on 8 different pieces of consecutive data. In other words, burst length is 8, and tburst = burst_length/2 with double data rate mechanism
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TMRD = (4*CK_PERIOD/CLK_SERDES_PERIOD); // tMRD = 4 clock cycles, Time MRS to MRS command Delay
localparam [FIXED_POINT_BITWIDTH-1:0] TIME_TMOD = (12*CK_PERIOD/CLK_SERDES_PERIOD); // tMOD = 12 clock cycles, Time MRS to non-MRS command Delay
`endif
localparam TIME_TWTR = 4; // Delay from start of internal WRITE transaction to internal READ command, MIN = greater of 4CK or 7.5ns;
localparam TIME_TDAL = TIME_TWR + TIME_TRP; // Auto precharge write recovery + precharge time
localparam TIME_TRPRE = 1; // this is for read pre-amble. It is the time between when the data strobe goes from non-valid (HIGH) to valid (LOW, initial drive level).
localparam TIME_TRPST = 1; // this is for read post-amble. It is the time from when the last valid data strobe to when the strobe goes to HIGH, non-drive level.
localparam TIME_TWPRE = 1; // this is for write pre-amble. It is the time between when the data strobe goes from non-valid (HIGH) to valid (LOW, initial drive level).
localparam TIME_TWPST = 1; // this is for write post-amble. It is the time from when the last valid data strobe to when the strobe goes to HIGH, non-drive level.
localparam TIME_TMPRR = 1; // this is for MPR System Read Calibration. It is the time between MULTIPURPOSE REGISTER READ burst end until mode register set for multipurpose register exit
localparam TIME_WRITE_COMMAND_TO_DQS_VALID = TIME_WL-TIME_TWPRE; // time between write command and valid DQS
localparam TIME_TCCD = (4*CK_PERIOD/CLK_SERDES_PERIOD); // CAS#-to-CAS# command delay, applicable for consecutive DRAM write or read operations
localparam ADDRESS_FOR_MODE_REGISTER_0 = 0;
localparam ADDRESS_FOR_MODE_REGISTER_1 = 1;
localparam ADDRESS_FOR_MODE_REGISTER_2 = 2;
localparam ADDRESS_FOR_MODE_REGISTER_3 = 3;
// Mode register 0 (MR0) settings
localparam MR0 = 2'b00; // Mode register set 0
localparam PRECHARGE_PD = 1'b1; // DLL on
localparam WRITE_RECOVERY = 3'b010; // WR = 6 , WR (cycles) = roundup (tWR [ns]/tCK [ns])
localparam DLL_RESET = 1'b1;
localparam CAS_LATENCY_46 = 3'b001;
localparam CAS_LATENCY_2 = 1'b0;
localparam CAS_LATENCY = {CAS_LATENCY_46, CAS_LATENCY_2}; // CL = 5
localparam READ_BURST_TYPE = 1'b0; // sequential burst
localparam BURST_LENGTH = 2'b0; // Fixed BL8
// Mode register 1 (MR1) settings
localparam MR1 = 2'b01; // Mode register set 1
localparam Q_OFF = 1'b0; // Output enabled
localparam TDQS = 1'b0; // TDQS disabled (x8 configuration only)
localparam RTT_9 = 1'b0;
localparam RTT_6 = 1'b0;
localparam RTT_2 = 1'b0;
localparam RTT = {RTT_9, RTT_6, RTT_2}; // on-die termination resistance value
localparam WL = 1'b0; // Write levelling disabled
localparam ODS_5 = 1'b0;
localparam ODS_2 = 1'b1;
localparam ODS = {ODS_5, ODS_2}; // Output drive strength set at 34 ohm
localparam AL = 2'b0; // Additive latency disabled
localparam DLL_EN = 1'b0; // DLL is enabled
// Mode register 3 (MR3) settings
localparam MPR_EN = 1'b1; // enables or disables Dataflow from MPR, in most cases it is a must to enable
localparam MPR_READ_FUNCTION = 2'b0; // Predefined data pattern for READ synchronization
localparam MPR_BITWIDTH_COMBINED = 3; // the three least-significant-bits of MR3
localparam A10 = 10; // address bit for auto-precharge option
localparam A12 = 12; // address bit for burst-chop option
localparam HIGH_REFRESH_QUEUE_THRESHOLD = 4;
reg MPR_ENABLE, MPR_Read_had_finished; // for use within MR3 finite state machine
`ifndef USE_ILA
wire [DQ_BITWIDTH-1:0] dq_w; // the output data stream is NOT serialized
`endif
`ifndef USE_ILA
wire [DQ_BITWIDTH-1:0] dq_r; // the input data stream is NOT serialized
`endif
// incoming signals from RAM
`ifdef USE_x16
wire ldqs_r;
wire ldqs_n_r;
wire udqs_r;
wire udqs_n_r;
`else
wire dqs_r;
wire dqs_n_r;
`endif
// outgoing signals to RAM
`ifdef USE_x16
wire ldqs_w;
wire ldqs_n_w;
wire udqs_w;
wire udqs_n_w;
`else
wire dqs_w;
wire dqs_n_w;
`endif
`ifndef HIGH_SPEED
// Purposes of Clock divider:
// 1. for developing correct logic first before making the DDR memory controller works in higher frequency,
// 2. to perform 90 degree phase shift on DQ signal with relative to DQS signal during data writing stage
// 3. to perform 180 degree phase shift (DDR mechanism of both DQS and DQ signals need to work on
// both posedge and negedge clk) for the next consecutive data
// See https://i.imgur.com/dnDwZul.png or
// https://www.markimicrowave.com/blog/top-7-ways-to-create-a-quadrature-90-phase-shift/
// See https://i.imgur.com/ZnBuofE.png or
// https://patentimages.storage.googleapis.com/0e/94/46/6fdcafc946e940/US5297181.pdf#page=3
// Will use digital PLL or https://stackoverflow.com/a/50172237/8776167 in later stage of the project
// See https://www.edaplayground.com/x/gXC for waveform simulation of the clock divider
reg clk_slow;
reg [($clog2(DIVIDE_RATIO_HALVED)-1):0] counter;
reg counter_reset;
always @(posedge clk)
begin
if(reset) counter_reset <= 1;
`ifndef XILINX
else counter_reset <= (counter == DIVIDE_RATIO_HALVED[0 +: $clog2(DIVIDE_RATIO_HALVED)] - 1'b1);
`else
else counter_reset <= (counter == DIVIDE_RATIO_HALVED[0 +: 1] - 1'b1);
`endif
end
always @(posedge clk)
begin
if(reset) counter <= 0;
else if(counter_reset) counter <= 1;
else counter <= counter + 1;
end
always @(posedge clk)
begin
if(reset) clk_slow <= 1;
else if(counter_reset)
clk_slow <= ~clk_slow;
end
assign ck = clk_slow;
assign ck_n = ~clk_slow;
wire clk90_slow_is_at_high = (~clk_slow && counter_reset) || (clk_slow && ~counter_reset);
wire clk90_slow_is_at_low = (clk_slow && counter_reset) || (~clk_slow && ~counter_reset);
wire clk90_slow_posedge = (clk_slow && counter_reset);
assign clk_slow_posedge = (clk_slow && ~counter_reset);
wire clk_slow_negedge = (~clk_slow && ~counter_reset);
wire clk180_slow = ~clk_slow; // simply inversion of the clk_slow signal will give 180 degree phase shift
assign clk180_slow_posedge = clk_slow_negedge;
`ifdef USE_x16
assign ldqs_w = clk_slow;
assign ldqs_n_w = ~clk_slow;
assign udqs_w = clk_slow;
assign udqs_n_w = ~clk_slow;
`else
assign dqs_w = clk_slow;
assign dqs_n_w = ~clk_slow;
`endif
`else
// wire clk_serdes_data;
// wire clk_serdes;
wire ck, ck_out;
`ifndef TESTBENCH
wire ck_90;
wire ck_270;
`endif
wire ck_180_out;
wire locked;
// for dynamic phase shift
reg psen;
wire psdone;
wire ck_dynamic_90, ck_dynamic_270;
wire locked_dynamic;
`ifdef XILINX
// For Artix-7, see https://www.reddit.com/r/FPGA/comments/u8kno6/place_30574_poor_placement_for_routing_between_an/
pll_ddr pll_static_clocks
( // Clock in ports
.clk(clk), // IN 50MHz(Spartan-6 board), 100MHz(Artix-7 board)
// Clock out ports
// SERDES_RATIO = 8, but 2 separate serdes are used due to double-data-rate restriction
// So, 333.333MHz divided by (SERDES_RATIO >> 1) equals 83.333MHz
.clk_serdes_data(clk_serdes_data), // OUT 83.333MHz, 270 phase shift, for DRAM data
.clk_serdes(clk_serdes), // OUT 83.333MHz, 45 phase shift, for DRAM command
.ck(ck), // OUT 333.333MHz, 0 phase shift
.ck_90(ck_90), // OUT 333.333MHz, 90 phase shift, for dq phase shifting purpose
.ck_180(ck_180), // OUT 333.333MHz, 180 phase shift
.ck_270(ck_270), // OUT 333.333MHz, 270 phase shift, for dq phase shifting purpose
// Status and control signals
.reset(reset), // IN
.locked(locked) // OUT
);
localparam NUM_OF_FF_SYNCHRONIZERS_FOR_CK_180_DOMAIN_TO_CK_DOMAIN = 4;
// to synchronize signal in ck_180 domain to ck domain
reg [NUM_OF_FF_SYNCHRONIZERS_FOR_CK_180_DOMAIN_TO_CK_DOMAIN-1:0] data_read_is_ongoing_ck;
reg data_read_is_ongoing_previous;
always @(posedge ck)
data_read_is_ongoing_previous <= data_read_is_ongoing_ck;
reg psdone_previous;
always @(posedge ck) psdone_previous <= psdone;
always @(posedge ck)
begin
// triggers the first phase shift enable request only during the start of read operation
if(~data_read_is_ongoing_previous && data_read_is_ongoing_ck) psen <= 1;
// Phase shifting is like changing PLL settings, so need to wait for new PLL lock in order to avoid
// Warning : Please wait for PSDONE signal before adjusting the Phase Shift
// asserts psen signal only when psdone is asserted low after asserted high previously
else if(psdone_previous && ~psdone) psen <= psdone;
// assert PSEN for one PSCLK cycle only and then wait for PSDONE to assert before performing
// another phase shift operation. Asserting PSEN for more than one PSCLK cycle can cause the DCM
// to phase shift in an unpredictable manner.
else psen <= 0;
end
localparam PLL_STATUS_BITWIDTH = 3;
`ifndef VIVADO
wire [PLL_STATUS_BITWIDTH-1:0] pll_read_status;
wire input_clk_stopped;
wire clk_valid;
`endif
// dynamic phase shift for incoming DQ bits
pll_tuneable pll_read
( // Clock in ports
.clk(clk), // IN 50MHz
// Clock out ports
.ck_dynamic_90(ck_dynamic_90), // OUT 333.333MHz, 90 phase shift, incoming DQ bit is not phase shifted
.ck_dynamic_270(ck_dynamic_270), // OUT 333.333MHz, 270 phase shift
// Dynamic phase shift ports
.psclk(udqs_r), // IN
.psen(psen), // IN
.psincdec(1'b1), // IN
.psdone(psdone), // OUT
// Status and control signals
.reset(reset), // IN
`ifdef VIVADO
.locked_dynamic(locked_dynamic) // OUT
`else
.locked_dynamic(locked_dynamic), // OUT
.status(pll_read_status), // OUT
.input_clk_stopped(input_clk_stopped), // OUT
.clk_valid(clk_valid) // OUT
`endif
);
// There is need for OBUF because if otherwise, the output of ODDR2_ck_out would be connected to
// FPGA fabric which is not allowed
OBUF #(
.DRIVE(12), // Specify the output drive strength
.IOSTANDARD("LVCMOS25"), // Specify the output I/O standard
.SLEW("SLOW") // Specify the output slew rate
)
OBUF_ck (
.O(ck_obuf), // Buffer output (connect directly to FPGA I/O pad)
.I(ck_out) // Buffer input
);
OBUF #(
.DRIVE(12), // Specify the output drive strength
.IOSTANDARD("LVCMOS25"), // Specify the output I/O standard
.SLEW("SLOW") // Specify the output slew rate
)
OBUF_ck_n (
.O(ck_n_obuf), // Buffer output (connect directly to FPGA I/O pad)
.I(ck_180_out) // Buffer input
);
// ODDR2: Input Double Data Rate Output Register with Set, Reset and Clock Enable.
// Spartan-6
// Xilinx HDL Libraries Guide, version 14.7
// As for why 'ck' and 'ck_180' signals are implemented using ODDR2 primitive,
// see https://forums.xilinx.com/t5/Other-FPGA-Architecture/Place-1198-Error-Route-cause-and-possible-solution/m-p/408489/highlight/true#M34528
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("ASYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_ck_out(
.Q(ck_out), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b1), // 1-bit DDR data input (associated with C0)
.D1(1'b0), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("ASYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_ck_180_out(
.Q(ck_180_out), // 1-bit DDR output data
.C0(ck_180), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b1), // 1-bit DDR data input (associated with C0)
.D1(1'b0), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
// DQS signals are of double-data-rate signals
`ifdef USE_x16
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("ASYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_ldqs_w(
.Q(ldqs_w), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b1), // 1-bit DDR data input (associated with C0)
.D1(1'b0), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("ASYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_udqs_w(
.Q(udqs_w), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b1), // 1-bit DDR data input (associated with C0)
.D1(1'b0), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("ASYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_ldqs_n_w(
.Q(ldqs_n_w), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b0), // 1-bit DDR data input (associated with C0)
.D1(1'b1), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("ASYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_udqs_n_w(
.Q(udqs_n_w), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b0), // 1-bit DDR data input (associated with C0)
.D1(1'b1), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
`else
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("SYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_dqs_w(
.Q(dqs_w), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b1), // 1-bit DDR data input (associated with C0)
.D1(1'b0), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
ODDR2 #(
.DDR_ALIGNMENT("C0"), // Sets output alignment to "NONE", "C0" or "C1"
.INIT(1'b0), // Sets initial state of the Q output to 1'b0 or 1'b1
.SRTYPE("SYNC") // Specifies "SYNC" or "ASYNC" set/reset
)
ODDR2_dqs_n_w(
.Q(dqs_n_w), // 1-bit DDR output data
.C0(ck), // 1-bit clock input
.C1(ck_180), // 1-bit clock input
.CE(1'b1), // 1-bit clock enable input
.D0(1'b1), // 1-bit DDR data input (associated with C0)
.D1(1'b0), // 1-bit DDR data input (associated with C1)
.R(1'b0), // 1-bit reset input
.S(1'b0) // 1-bit set input
);
`endif
`elsif ALTERA
pll_ddr pll_static_clocks
( // Clock in ports
.inclk0(clk), // IN 50MHz
// Clock out ports
//.clk_pll(clk_pll), // OUT 83.333MHz, 45 phase shift, for solving STA issues
// SERDES_RATIO = 8, but 2 separate serdes are used due to double-data-rate restriction
// So, 333.333MHz divided by (SERDES_RATIO >> 1) equals 83.333MHz
.c4(clk_serdes), // OUT 83.333MHz, 45 phase shift, for SERDES use
.c0(ck), // OUT 333.333MHz, 0 phase shift
.c1(ck_90), // OUT 333.333MHz, 90 phase shift, for dq phase shifting purpose
.c2(ck_180), // OUT 333.333MHz, 180 phase shift
.c3(ck_270), // OUT 333.333MHz, 270 phase shift, for dq phase shifting purpose
// Status and control signals
.areset(reset), // IN
.locked(locked) // OUT
);
// dynamic phase shift for incoming DQ bits
pll_tuneable
(
.areset(reset), // IN
.inclk0(clk), // IN 50 MHz
.pfdena(1'b1), // IN
.phasecounterselect(udqs_r), // IN
.phasestep(psen), // IN
.phaseupdown(1'b1), // IN
.scanclk(clk), // IN
.c0(ck_dynamic_90), // OUT 333.333MHz, 90 phase shift, incoming DQ bit is not phase shifted
.c1(ck_dynamic_270), // OUT 333.333MHz, 270 phase shift
.locked(locked_dynamic), // OUT
.phasedone(psdone) // OUT
);
`endif
`endif
// See https://www.micron.com/-/media/client/global/documents/products/technical-note/dram/tn4605.pdf#page=7
// for an overview on DQS Preamble and Postamble bits
`ifndef HIGH_SPEED
wire [(DQ_BITWIDTH >> 1)-1:0] ldq_w;
wire [(DQ_BITWIDTH >> 1)-1:0] udq_w;
reg dqs_is_at_high_previously;
reg dqs_is_at_low_previously;
`ifndef USE_ILA
`ifdef USE_x16
wire dqs_is_at_high = (ldqs_r & ~ldqs_n_r) || (udqs_r & ~udqs_n_r);
wire dqs_is_at_low = (~ldqs_r & ldqs_n_r) || (~udqs_r & udqs_n_r);
`else
wire dqs_is_at_high = (dqs & ~dqs_n);
wire dqs_is_at_low = (~dqs & dqs_n);
`endif
wire dqs_rising_edge = (dqs_is_at_low_previously && dqs_is_at_high);
wire dqs_falling_edge = (dqs_is_at_high_previously && dqs_is_at_low);
`else
`ifdef USE_x16
assign dqs_is_at_high = (ldqs_r & ~ldqs_n_r) || (udqs_r & ~udqs_n_r);
assign dqs_is_at_low = (~ldqs_r & ldqs_n_r) || (~udqs_r & udqs_n_r);
`else
assign dqs_is_at_high = (dqs & ~dqs_n);
assign dqs_is_at_low = (~dqs & dqs_n);
`endif
assign dqs_rising_edge = (dqs_is_at_low_previously && dqs_is_at_high);
assign dqs_falling_edge = (dqs_is_at_high_previously && dqs_is_at_low);
`endif
always @(posedge clk) dqs_is_at_high_previously <= dqs_is_at_high;
always @(posedge clk) dqs_is_at_low_previously <= dqs_is_at_low;
// For WRITE, we have to phase-shift DQS by 90 degrees and output the phase-shifted DQS to RAM
// phase-shifts the incoming dqs and dqs_n signals by 90 degrees
// with reference to outgoing 'ck' DDR signal
// the reason is to sample at the middle of incoming `dq` signal
`ifndef USE_ILA
reg [($clog2(DIVIDE_RATIO_HALVED)-1):0] dqs_counter;
`endif
always @(posedge clk)
begin
if(reset) dqs_counter <= 0;
else begin
// Due to PCB trace layout and high-speed DDR signal transmission,
// there is no alignment to any generic clock signal that we can depend upon,
// especially when data is coming back from the SDRAM chip.
// Thus, we could only depend upon incoming `DQS` signal to sample 'DQ' signal
if(dqs_rising_edge | dqs_falling_edge) dqs_counter <= 1;
else if(dqs_counter > 0)
dqs_counter <= dqs_counter + 1;
end
end
`ifndef XILINX
wire dqs_phase_shifted = (dqs_counter == DIVIDE_RATIO_HALVED[0 +: $clog2(DIVIDE_RATIO_HALVED)]);
`else
wire dqs_phase_shifted = (dqs_counter == DIVIDE_RATIO_HALVED[0 +: 2]);
`endif
wire dqs_n_phase_shifted = ~dqs_phase_shifted;
always @(posedge clk)
begin
if(reset) data_from_ram <= 0;
// 'dq_r' is sampled at its middle (thanks to 90 degree phase shift on dqs)
else if(dqs_phase_shifted & ~dqs_n_phase_shifted)
begin
`ifdef XILINX
data_from_ram <= dq_r;
`elsif LATTICE
data_from_ram <= dq_r;
`else // Micron DDR3 simulation model
data_from_ram <= dq;
`endif
end
end
`ifdef USE_x16
wire [(DQ_BITWIDTH >> 1)-1:0] ldq;
wire [(DQ_BITWIDTH >> 1)-1:0] udq;